This R code includes the analyses and figures of the CPS1000 manuscript, written by Kevin D. Hofer and Junyan Lu.
library(knitr)
options(digits=3, width=80)
opts_chunk$set(echo=TRUE,tidy=FALSE,include=TRUE)
library(ggpmisc)
suppressMessages(library(RColorBrewer))
suppressMessages(library(ggrepel))
suppressMessages(library(cowplot))
suppressMessages(library(gridExtra))
suppressMessages(library(drc))
suppressMessages(library(tidyverse))
detach("package:dplyr")
suppressMessages(library(dplyr))
library("survminer")
library("survival")
library("maxstat")
library("patchwork")
library("glmnet")
library("corrplot")
library("writexl")
library("ggplot2")
library("flextable")
library("ggpubr")
library("ComplexHeatmap")
library("circlize")
library("grid")
library("RColorBrewer")
library("ggplotify")
library("magrittr")
library("Rtsne")
library("paletteer")
library("ggtext")
library("ggh4x")
library("plotrix")
library("scales")
library("corrplot")
library("gridGraphics")
library("draw")
library("stringr")
library("ggsurvfit")
library("BloodCancerMultiOmics2017")
library("gtable")
library("statebins")
library("DESeq2")
library("apeglm")
library("fgsea")
library("msigdbr")
library("biomaRt")
library("org.Hs.eg.db")
library("ggtext")
library("flextable")
library("data.table")
library("readxl")
#maintain function of specific R packages
rename <- dplyr::rename
count <- dplyr::count
filter <- dplyr::filter
theme_figures <- theme_classic()+
theme(plot.title = element_text(size=8, hjust=0.5, face="bold"),
axis.text = element_text(color="black", size=8),
axis.title = element_text(size=8),
axis.line = element_line(size=0.5),
legend.title = element_text(size=8, face="bold"),
legend.text = element_text(size=8),
legend.key.size = unit(0.5, 'cm'))
## Warning: The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
theme_figures_angle45_x <- theme_classic()+
theme(plot.title=element_text(size=8, hjust=0.5, face="bold"),
axis.text.x = element_text(color="black", angle = 45, hjust=1, vjust=1, size=8),
axis.text.y = element_text(size=8),
axis.title = element_text(size=8),
legend.title = element_text(size=8, face="bold"),
legend.text = element_text(size=8),
axis.line = element_line(size=0.5),
legend.key.size = unit(0.5, 'cm'))
theme_figures_facet <- theme_bw() +
theme(strip.text.y = element_text(size = 0),
strip.text.x = element_text(size=8),
strip.background.y = element_rect(color=NA),
axis.text = element_text(size=8, color="black"),
axis.title = element_text(size=8, color="black"),
panel.spacing = unit(0.15, "lines"),
legend.position = "none",
panel.border = element_rect(color = "black", linewidth = 0.75, fill = NA),
panel.grid = element_blank(),
strip.background = element_rect(color = "black", linewidth = 0.75, fill = "grey90"),
plot.title = element_text(hjust=0.5, size=8, face="bold"))
theme_figures_facet_angle60_x <- theme_bw() +
theme(strip.text.y = element_text(size = 0),
strip.text.x = element_text(size=8),
strip.background.y = element_rect(color=NA),
axis.title = element_text(size=8),
axis.text.y = element_text(size =8, color="black"),
axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1, size=8, color = "black"),
panel.spacing = unit(0.15, "lines"),
legend.title = element_text(size=8, face="bold"),
legend.text=element_text(size=8),
legend.key.size = unit(0.5, 'cm'),
panel.border = element_rect(color = "black", linewidth = 0.75, fill = NA),
strip.background = element_rect(color = "black", linewidth = 0.75, fill = "grey90"),
plot.title = element_text(hjust=0.5, size=8, face="bold"),
strip.text = element_text(size = 8))
theme_figures_facet_angle45_x <- theme_figures_facet +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1))
theme_figures_facet_angle45_x_legend <- theme_figures_facet_angle45_x +
theme(legend.position = "right",
legend.title = element_text(size=8, face="bold"),
legend.text=element_text(size=8),
legend.key.size = unit(0.5, 'cm'))
theme_figures_border <- theme_bw() +
theme(axis.text = element_text(color="black", size=8),
axis.title = element_text(size=8),
legend.title = element_text(size=8, face="bold"),
legend.text=element_text(size=8),
legend.key.size = unit(0.5, 'cm'),
plot.title = element_text(hjust=0.5, size=8, face="bold"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_rect(color = 'black',
fill = NA,
size = 1))
## Warning: The `size` argument of `element_rect()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
#set colors
#diagnosis
colors_diag <- setNames(c("#1F78B4", "#B2DF8A", "#A6CEE3", "grey", "#E31A1C", "#FB9A99", "#FF7F00", "#CAB2D6"), c("MCL", "CLL", "T-PLL", "other", "AML", "T-ALL", "B-ALL", "B-PLL"))
colors_diag_IGHV <- setNames(c("#1F78B4", "#B2DF8A", "#33A02C", "grey80", "#E31A1C", "#FB9A99", "#FF7F00", "#CAB2D6", "#A6CEE3"), c("MCL", "U-CLL", "M-CLL", "other", "AML", "T-ALL", "B-ALL", "B-PLL", "T-PLL"))
#pathways
colors_pathways_mod <- setNames(c("lightgrey", "#BC80BD", "#FCCDE5", "#B3DE69", "#FDB462", "#80B1D3", "#FB8072", "#BEBADA", "#FFFFB3", "#8DD3C7"), c("other", "AKT/mTOR", "DNA damage response", "Chemotherapy" , "MAPK" , "B-cell receptor", "PI3K", "Stress pathway", "Bromodomain/PLK", "JAK/STAT"))
colors_pathways_mod2 <- setNames(c("lightgrey", "#FCCDE5", "#B3DE69", "#FDB462", "#80B1D3", "#FB8072", "#BEBADA", "#FFFFB3", "#8DD3C7", "#A6CEE3", "#6DBD57", "#DE9E83", "#BC80BD"), c("other", "DNA damage response", "Chemotherapy" , "MAPK" , "B-cell receptor", "PI3K", "Stress pathway", "Bromodomain/PLK", "JAK/STAT", "Apoptosis", "Cell cycle control", "Histone methylation", "PI3K/AKT/mTOR"))
colors_cor_heatmap <- setNames(c("#BC80BD", "#FDB462", "#80B1D3", "#FB8072", "#BEBADA"), c("AKT/mTOR", "MEK/ERK" , "B-cell receptor", "PI3K", "Stress pathway"))
#mode of action
colors_drug_type <- c("Chemo" = "#9a6a3d", "Kinase" = "#6A3D9A", "Other" = "#3d9a6a")
labels_drug_type <- c("Chemo" = "Chemotherapy", "Kinase" ="Kinase inhibitor", "Other" = "Other")
#genetics
colors_ddx3x <- setNames(c("#E78AC3", "grey60"), c(1, 0))
colors_del11q <- setNames(c("#E5C494", "grey60"), c(1, 0))
#longitudinal analysis
colors_treatment <- setNames(c("black", "salmon"), c("sample1_value", "sample2_value"))
#cox regression models
colors_cox <- setNames(c("#2171b5", "#cb181d"), c("TTT", "OS"))
colors_ibr_viab <- setNames(c("#CA054D", "#B96D40", "#A4D4B4", "#3B1C32"),
c("U-CLL, low ibrutinib viability", "U-CLL, high ibrutinib viability", "M-CLL, low ibrutinib viability", "M-CLL, high ibrutinib viability"))
#function for t-test
tTest <- function(val, fac) {
res <- t.test(val ~ fac, var.equal = TRUE)
data.frame(p = res$p.value,
mean1 = res$estimate[[1]],
mean2 = res$estimate[[2]],
diff = res$estimate[[2]] - res$estimate[[1]])
}
#helper function for mean viability boxplots
create_boxplot <- function(data, diagnosis_filter, colors, title) {
#order drugs by median viability
drug_order <- data |>
filter(diagnosis == diagnosis_filter) |>
group_by(drug) |>
summarise(median = median(mean_viab)) |>
arrange(desc(median)) |>
pull(drug)
#filter and reorder data
plot_data <- data |>
filter(diagnosis == diagnosis_filter) |>
mutate(drug = factor(drug, levels = drug_order))
#create plot
plot_data |>
filter(diagnosis == diagnosis_filter) |>
ggplot(aes(x=mean_viab, y=drug, color=diagnosis))+
geom_boxplot(alpha=0.8, outlier.shape=NA) +
geom_jitter(alpha=0.2, size=0.2, width = 0.15)+
scale_x_continuous(name ="Mean viability", breaks=seq(0,1.2, 0.2), limits=c(0,1.2))+
theme_bw() +
theme(
axis.text = element_text(color = "black", size = 8),
axis.title = element_text(size = 8),
plot.title = element_text(size = 8, hjust = 0.5, face = "bold"),
legend.position = "none",
panel.border = element_rect(color = "black", linewidth = 1, fill = NA)) +
labs(y = "", x = "Mean viability", title = title) +
scale_color_manual(values = colors_diag_IGHV)
}
#sigmoid curve fitting
testF <- function(m0, m1) {
Fstat <- ((sum(residuals(m0)^2) - sum(residuals(m1)^2))/(m0$df.residual-m1$df.residual))/(sum(residuals(m1)^2)/m1$df.residual)
1 - pf(Fstat, m0$df.residual-m1$df.residual, m1$df.residual)
}
calcAUC <- function(m1, minConc =0, maxConc=15, n=100) {
concSeq <- data.frame(conc = seq(minConc, maxConc, length.out = n))
valueConc <- data.frame(viab = predict(m1, concSeq),
conc = concSeq[,1])
valueConc <- valueConc[order(valueConc$conc),]
areaTotal <- 0
for (i in seq(nrow(valueConc)-1)) {
areaEach <- (valueConc$viab[i] + valueConc$viab[i+1])*(valueConc$conc[i+1]-valueConc$conc[i])*0.5
areaTotal <- areaTotal + areaEach
}
areaTotal/(valueConc[nrow(valueConc),]$conc-valueConc[1,]$conc)
}
calcParm <- function(df_input) {
df <- as.data.frame(df_input)
df$conc <- as.numeric(df$conc)
df$viab <- as.numeric(df$viab)
formula_obj <- viab ~ conc
out <- tryCatch({
m1 <- eval(substitute(
drm(formula_obj,
data = df,
fct = LL2.3u(),
robust = "tukey",
lowerl = c(0, 0, NA),
upperl = c(4, 1, NA)),
list(formula_obj = formula_obj, df = df)
))
fitRes <- m1$coefficients
m0 <- lm(viab ~ 1, data = df)
pred_vals <- predict(m1)
r2 <- cor(pred_vals, df$viab, use = "pairwise.complete.obs")^2
auc <- calcAUC(m1)
tibble(
HS = fitRes[[1]],
Einf = fitRes[[2]],
IC50 = fitRes[[3]],
pF = testF(m0, m1),
R2 = r2,
AUC = auc
)
}, error = function(e) {
message("Fit failed for a group: ", e$message)
tibble(
HS = NA_real_,
Einf = NA_real_,
IC50 = NA_real_,
pF = NA_real_,
R2 = NA_real_,
AUC = NA_real_
)
})
return(out)
}
#function to clean and combine multi-omics data
generateData <- function(inclSet, onlyCombine = FALSE, censor = NULL, robust = FALSE) {
dataScale <- function(x, censor = NULL, robust = FALSE) {
#function to scale different variables
if (length(unique(na.omit(x))) == 2){
#a binary variable, change to -0.5 and 0.5 for 1 and 2
x - 0.5
} else if (length(unique(na.omit(x))) == 3) {
#catagorical varialbe with 3 levels, methylation_cluster, change to -0.5,0,0.5
(x - 1)/2
} else {
if (robust) {
#continuous variable, centered by median and divied by 2*mad
mScore <- (x-median(x,na.rm=TRUE))/(1.4826*mad(x,na.rm=TRUE))
if (!is.null(censor)) {
mScore[mScore > censor] <- censor
mScore[mScore < -censor] <- -censor
}
mScore/2
} else {
mScore <- (x-mean(x,na.rm=TRUE))/(sd(x,na.rm=TRUE))
if (!is.null(censor)) {
mScore[mScore > censor] <- censor
mScore[mScore < -censor] <- -censor
}
mScore/2
}
}
}
allResponse <- list() #list for storing response vector (drug viability)
allExplain <- list() #list for storing explain matrices (multi-omics)
for (drug in colnames(inclSet$drugs)) {
y <- inclSet$drugs[,drug]
#get overlapped samples for each dataset
overSample <- names(y)
for (eachSet in inclSet) {
overSample <- intersect(overSample,rownames(eachSet))
}
y <- dataScale(y[overSample], censor = censor, robust = robust)
allResponse[[drug]] <- y
}
#generate explanatory variable table for each seahorse measurement
expTab <- list()
if ("gen" %in% names(inclSet)) {
geneTab <- inclSet$gen[overSample,]
#at least 3 mutated sample
geneTab <- geneTab[, colSums(geneTab) >= 3]
vecName <- sprintf("genetic(%s)", ncol(geneTab))
expTab[[vecName]] <- apply(geneTab,2,dataScale)
}
if ("RNA" %in% names(inclSet)){
#for PCA
rnaPCA <- inclSet$RNA[overSample, ]
colnames(rnaPCA) <- paste0("con.expression",colnames(rnaPCA), sep = "")
expTab[["expression(20)"]] <- apply(rnaPCA,2,dataScale, censor = censor, robust = robust)
}
if ("meth" %in% names(inclSet)){
methPCA <- inclSet$meth[overSample,]
colnames(methPCA) <- paste("con.methylation",colnames(methPCA),sep = "")
expTab[["methylation(20)"]] <- apply(methPCA[,1:20],2,dataScale, censor = censor, robust = robust)
}
if ("IGHV" %in% names(inclSet)) {
IGHVtab <- inclSet$IGHV[overSample,,drop=FALSE]
expTab[["IGHV(1)"]] <- apply(IGHVtab,2,dataScale)
}
if ("Mcluster" %in% names(inclSet)) {
methTab <- inclSet$Mcluster[overSample,,drop=FALSE]
colnames(methTab) <- "methylation cluster"
expTab[["methCluster(1)"]] <- apply(methTab,2,dataScale)
}
if ("pretreated" %in% names(inclSet)){
preTab <- inclSet$pretreated[overSample,,drop=FALSE]
expTab[["pretreated(1)"]] <- apply(preTab,2,dataScale)
}
comboTab <- c()
for (eachSet in names(expTab)){
comboTab <- cbind(comboTab, expTab[[eachSet]])
}
vecName <- sprintf("all(%s)", ncol(comboTab))
expTab[[vecName]] <- comboTab
allExplain <- expTab
if (onlyCombine) {
#only return combined results, for feature selection
allExplain <-expTab[length(expTab)]
}
return(list(allResponse=allResponse, allExplain=allExplain))
}
#function for multi-variant regression
runGlm <- function(X, y, method = "ridge", repeats=20, folds = 3) {
modelList <- list()
lambdaList <- c()
varExplain <- c()
coefMat <- matrix(NA, ncol(X), repeats)
rownames(coefMat) <- colnames(X)
if (method == "lasso"){
alpha = 1
} else if (method == "ridge") {
alpha = 0
}
for (i in seq(repeats)) {
if (ncol(X) > 2) {
res <- cv.glmnet(X,y, type.measure = "mse", family="gaussian",
nfolds = folds, alpha = alpha, standardize = FALSE)
lambdaList <- c(lambdaList, res$lambda.min)
modelList[[i]] <- res
coefModel <- coef(res, s = "lambda.min")[-1] #remove intercept row
coefMat[,i] <- coefModel
#calculate variance explained
y.pred <- predict(res, s = "lambda.min", newx = X)
varExp <- cor(as.vector(y),as.vector(y.pred))^2
varExplain[i] <- ifelse(is.na(varExp), 0, varExp)
} else {
fitlm<-lm(y~., data.frame(X))
varExp <- summary(fitlm)$r.squared
varExplain <- c(varExplain, varExp)
}
}
list(modelList = modelList, lambdaList = lambdaList, varExplain = varExplain, coefMat = coefMat)
}
#function to calculate pair-wise mean viability differences
create_mean_differences <- function(treatments_df, viab_df) {
# Get drug columns
drug_cols <- setdiff(names(viab_df), c("patientID", "sampleID", "sampleDate"))
# Get sample1 values
sample1_long <- treatments_df %>%
left_join(viab_df, by = c("sample1" = "sampleID")) %>%
dplyr::select(patientID.x, pair, sample1, sample2, status, therapy, all_of(drug_cols)) %>%
rename(patientID = patientID.x) %>%
pivot_longer(cols = all_of(drug_cols), names_to = "drug", values_to = "sample1_value")
# Get sample2 values
sample2_long <- treatments_df %>%
left_join(viab_df, by = c("sample2" = "sampleID")) %>%
dplyr::select(patientID.x, pair, all_of(drug_cols)) %>%
rename(patientID = patientID.x) %>%
pivot_longer(cols = all_of(drug_cols), names_to = "drug", values_to = "sample2_value")
# Combine and calculate differences
differences_long <- sample1_long %>%
left_join(sample2_long, by = c("patientID", "pair", "drug")) %>%
mutate(
difference = sample2_value - sample1_value,
fold_change = sample2_value / sample1_value,
percent_change = ((sample2_value - sample1_value) / sample1_value) * 100
)
return(differences_long)
}
#function to calculate pair-wise single concentration viability differences
create_conc_differences <- function(treatments_df, viab_df) {
# Get drug columns
drug_cols <- setdiff(names(merged_table_wide), c("patientID", "sampleID", "sampleDate", "conc_index"))
# Get sample1 values
sample1_long <- treatments_df %>%
left_join(viab_df, by = c("sample1" = "sampleID")) %>%
dplyr::select(patientID.x, pair, sample1, sample2, status, therapy, conc_index, all_of(drug_cols)) %>%
rename(patientID = patientID.x) %>%
pivot_longer(cols = all_of(drug_cols), names_to = "drug", values_to = "sample1_value")
# Get sample2 values
sample2_long <- treatments_df %>%
left_join(viab_df, by = c("sample2" = "sampleID")) %>%
dplyr::select(patientID.x, pair, conc_index, all_of(drug_cols)) %>%
rename(patientID = patientID.x) %>%
pivot_longer(cols = all_of(drug_cols), names_to = "drug", values_to = "sample2_value")
# Combine and calculate differences
differences_long <- sample1_long %>%
left_join(sample2_long, by = c("patientID", "pair", "drug", "conc_index")) %>%
mutate(
difference = sample2_value - sample1_value,
fold_change = sample2_value / sample1_value,
percent_change = ((sample2_value - sample1_value) / sample1_value) * 100
)
return(differences_long)
}
#function for univariate cox regression
com_uni <- function(response, time, endpoint, scale =FALSE) {
if (scale) {
#calculate z-score
response <- (response - mean(response, na.rm = TRUE))/sd(response, na.rm=TRUE)
}
tryCatch({
surv <- coxph(Surv(time, endpoint) ~ response)
resTab <- tibble(p = summary(surv)[[7]][,5],
HR = summary(surv)[[7]][,2],
lower = summary(surv)[[8]][,3],
higher = summary(surv)[[8]][,4])
}, error = function(err) {
resTab <- tibble(p = NA,
HR = NA,
lower = NA,
higher = NA)
})
}
#function for multivariate cox regression
com <- function(response, ighv, TP53_del17p, age, time, endpoint, scale =FALSE) {
if (scale) {
#calculate z-score
response <- (response - mean(response, na.rm = TRUE))/sd(response, na.rm=TRUE)
age <- (age - mean(age, na.rm = TRUE))/sd(age, na.rm=TRUE)
}
tryCatch({
surv <- coxph(Surv(time, endpoint) ~ response+ighv+TP53_del17p+age)
resTab <- tibble(variable = c("response", "ighv", "TP53_del17p", "age"),
p = summary(surv)[[7]][,5],
HR = summary(surv)[[7]][,2],
lower = summary(surv)[[8]][,3],
higher = summary(surv)[[8]][,4])
}, error = function(err) {
resTab <- tibble(variable = c("response", "ighv", "TP53_del17p", "age"),
p = NA,
HR = NA,
lower = NA,
higher = NA)
})
}
#function for the lasso heatmap plot
lassoPlot <- function(lassoOut, cleanData, freqCut = 1, coefCut = 0.01, setNumber = "last") {
plotList <- list()
if (setNumber == "last") {
setNumber <- length(lassoOut[[1]])
} else {
setNumber <- setNumber
}
for (seaName in names(lassoOut)) {
#for the barplot on the left of the heatmap
barValue <- rowMeans(lassoOut[[seaName]][[setNumber]]$coefMat)
freqValue <- rowMeans(abs(sign(lassoOut[[seaName]][[setNumber]]$coefMat)))
barValue <- barValue[abs(barValue) >= coefCut & freqValue >= freqCut] # a certain threshold
barValue <- barValue[order(barValue)]
if(length(barValue) == 0) {
plotList[[seaName]] <- NA
next
}
#for the heatmap and scatter plot below the heatmap
allData <- cleanData$allExplain[[setNumber]]
seaValue <- cleanData$allResponse[[seaName]]*2 #back to Z-score
tabValue <- allData[, names(barValue),drop=FALSE]
ord <- order(seaValue)
seaValue <- seaValue[ord]
tabValue <- tabValue[ord, ,drop=FALSE]
sampleIDs <- rownames(tabValue)
tabValue <- as.tibble(tabValue)
#change scaled binary back to catagorical
for (eachCol in colnames(tabValue)) {
if (strsplit(eachCol, split = "[.]")[[1]][1] != "con") {
tabValue[[eachCol]] <- as.integer(as.factor(tabValue[[eachCol]]))
}
else {
tabValue[[eachCol]] <- tabValue[[eachCol]]*2 #back to Z-score
}
}
tabValue$Sample <- sampleIDs
#Mark different rows for different scaling in heatmap
matValue <- gather(tabValue, key = "Var",value = "Value", -Sample)
matValue$Type <- "mut"
#For continuious value
matValue$Type[grep("con.",matValue$Var)] <- "con"
#for methylation_cluster
matValue$Type[grep("cluster",matValue$Var)] <- "meth"
#change the scale of the value, let them do not overlap with each other
matValue[matValue$Type == "mut",]$Value = matValue[matValue$Type == "mut",]$Value + 10
matValue[matValue$Type == "meth",]$Value = matValue[matValue$Type == "meth",]$Value + 20
#color scale for viability
idx <- matValue$Type == "con"
myCol <- colorRampPalette(c('dark red','white','dark blue'),
space = "Lab")
if (sum(idx) != 0) {
matValue[idx,]$Value = round(matValue[idx,]$Value,digits = 2)
minViab <- min(matValue[idx,]$Value)
maxViab <- max(matValue[idx,]$Value)
limViab <- max(c(abs(minViab), abs(maxViab)))
scaleSeq1 <- round(seq(-limViab, limViab,0.01), digits=2)
color4viab <- setNames(myCol(length(scaleSeq1+1)), nm=scaleSeq1)
} else {
scaleSeq1 <- round(seq(0,1,0.01), digits=2)
color4viab <- setNames(myCol(length(scaleSeq1+1)), nm=scaleSeq1)
}
#change continues measurement to discrete measurement
matValue$Value <- factor(matValue$Value,levels = sort(unique(matValue$Value)))
#change order of heatmap
names(barValue) <- gsub("con.", "", names(barValue))
matValue$Var <- gsub("con.","",matValue$Var)
matValue$Var <- factor(matValue$Var, levels = names(barValue))
matValue$Sample <- factor(matValue$Sample, levels = names(seaValue))
#plot the heatmap
p1 <- ggplot(matValue, aes(x=Sample, y=Var)) + geom_tile(aes(fill=Value), color = "gray") +
theme_bw() + scale_y_discrete(expand=c(0,0)) +
theme(axis.text.y=element_text(hjust=0, size=8, color="black"), axis.text.x=element_blank(),
axis.ticks=element_blank(), panel.border=element_rect(colour="gainsboro"),
plot.title=element_text(size=8, color="black", face="bold"),
panel.background=element_blank(), panel.grid.major=element_blank(),
panel.grid.minor=element_blank()) +
xlab("patients") + ylab("") + scale_fill_manual(name="Mutated", values=c(color4viab, `11`="gray96", `12`='black', `21`='#DEEBF7', `22`='#9ECAE1',`23` = '#3182BD'),guide=FALSE) + ggtitle(seaName)
#Plot the bar plot on the left of the heatmap
barDF = data.frame(barValue, nm=factor(names(barValue),levels=names(barValue)))
p2 <- ggplot(data=barDF, aes(x=nm, y=barValue)) +
geom_bar(stat="identity", fill="lightblue", position = "identity", width=.8, alpha=0.75) +
theme_bw() + geom_hline(yintercept=0, size=0.5) + scale_x_discrete(expand=c(0,0.5)) +
scale_y_continuous(expand=c(0,0)) + coord_flip(ylim=c(min(barValue),max(barValue))) +
theme(panel.grid.major=element_blank(), panel.background=element_blank(), axis.ticks.y = element_blank(),
panel.grid.minor = element_blank(), axis.text=element_text(size=8, color="black"), panel.border=element_blank(), axis.line.x = element_line(color="black", size=0.5))+
xlab("") + ylab("") + geom_vline(xintercept=c(0.5), color="black")
#Plot the scatter plot under the heatmap
# scatterplot below
scatterDF = data.frame(X=factor(names(seaValue), levels=names(seaValue)), Y=seaValue)
p3 <- ggplot(scatterDF, aes(x=X, y=Y)) + geom_point(shape=21, fill="grey80", colour="black", size=1.2) + theme_bw() +
theme(panel.grid.minor=element_blank(), panel.grid.major.x=element_blank(), axis.title.x=element_blank(), axis.text.x=element_blank(), axis.ticks.x=element_blank(), axis.text.y=element_text(size=8, color="black"), panel.border=element_rect(colour="dimgrey", size=0.1), panel.background=element_rect(fill="gray96"))
#Scale bar for continuous variable
Vgg = ggplot(data=data.frame(x=1, y=as.numeric(names(color4viab))), aes(x=x, y=y, color=y)) + geom_point() +
scale_color_gradientn(name="Z-score", colours =color4viab) + theme(legend.title=element_text(size=12), legend.text=element_text(size=10))
#Assemble all the plots togehter
# construct the gtable
wdths = c(1.5, 0.2, 1.3*ncol(matValue), 0.7, 1)
hghts = c(0.1, 0.0010*nrow(matValue), 0.16, 0.5)
gt = gtable(widths=unit(wdths, "in"), heights=unit(hghts, "in"))
## make grobs
gg1 = ggplotGrob(p1)
gg2 = ggplotGrob(p2)
gg3 = ggplotGrob(p3)
gg4 = ggplotGrob(Vgg)
## fill in the gtable
gt = gtable_add_grob(gt, gtable_filter(gg2, "panel"), 2, 1) # add barplot
gt = gtable_add_grob(gt, gtable_filter(gg1, "panel"), 2, 3) # add heatmap
gt = gtable_add_grob(gt, gtable_filter(gg1, "title"), 1, 3) #add title to plot
gt = gtable_add_grob(gt, gtable_filter(gg3, "panel"), 4, 3) # add scatterplot
gt = gtable_add_grob(gt, gtable_filter(gg2, "axis-b"), 3, 1) # y axis for barplot
gt = gtable_add_grob(gt, gtable_filter(gg3, "axis-l"), 4, 2) # y axis for scatter plot
gt = gtable_add_grob(gt, gtable_filter(gg1, "axis-l"), 2, 4) # variable names
#gt = gtable_add_grob(gt, gtable_filter(gg4, "guide-box"), 2, 5) # scale bar for continous variables
#plot
#grid.draw(gt)
plotList[[seaName]] <- gt
}
return(plotList)
}
#function for supplementary tables
stab_theme <- function(ft) {
ft |>
theme_alafoli() |>
autofit() |>
fontsize(size = 8, part = "header") |>
fontsize(size = 8, part = "body") |>
padding(padding.top = 3, padding.bottom = 3, part = "all") |>
align(align = "left", part = "body") |>
align(align = "left", part = "header") |>
bold(part = "header") |>
width(j = 1, width = 1.25) |>
width(j = 2, width = 3.5) |>
width(j = 3, width = 0.75) |>
line_spacing(space = 1, part = "all") |>
color(color = "black", part = "all")
}
# Modify according to directory
#filepath <-"../../"
filepath <- "~/Dropbox/Medizin/Forschung/Aktuelle Forschungsprojekte/CLL CPS1000/figures/github/"
load(paste0(filepath,"data/CPS1000_data.RData"))
Description of datasets:
- screen: processed DMSO-normalized ex vivo viability measurements on the first sample of each patient used in the analysis
- screen_allSamples: as above, but including all measurements
- drugs: list of drugs used in the screen
- patients: list of patients with corresponding diagnoses
- metadata: annotation of genetic aberrations
- treatments: treatment contexts for the longitudinal analysis of paired samples
- survival: overall survival and time-to-treatment data
- meth: Methylation-array data
- rna: RNA-seq data
- ddx3x (table, cps1000, literature): data on DDX3X mutations
#prepare data
drugs <- drugs |> as.data.frame()
as.factor(drugs$Pathway)
## [1] MYC Apoptosis PI3K/AKT/mTOR
## [4] EGFR Histone methylation JAK/STAT
## [7] DNA damage response PI3K/AKT/mTOR FGFR
## [10] DNA damage response ITK ALK
## [13] Chemotherapy MAPK Chemotherapy
## [16] Wnt MAPK ABL (BCR)
## [19] Chemotherapy PI3K/AKT/mTOR Histone methylation
## [22] Chemotherapy HGF Stress pathway
## [25] B-cell receptor PI3K/AKT/mTOR ABL (BCR)
## [28] TLR MAPK DNA damage response
## [31] MEN1 Chemotherapy MEN1
## [34] PKC PI3K/AKT/mTOR TLR
## [37] DNA damage response DNA damage response Stress pathway
## [40] B-cell receptor Bromodomain Cell cycle control
## [43] Histone deacetylation Cytoskeleton B-cell receptor
## [46] Cytokine receptor PI3K/AKT/mTOR DNA damage response
## [49] JAK/STAT MAPK MAPK
## [52] Nuclear export DNA damage response Histone methylation
## [55] Cytokine receptor JAK/STAT Cell cycle control
## [58] MAPK Apoptosis Apoptosis
## [61] Hedgehog PLK MAPK
## 28 Levels: ABL (BCR) ALK Apoptosis B-cell receptor ... Wnt
Fig2a <- ggplot(drugs, aes(x = factor(Pathway))) +
geom_bar(aes(fill = Type)) +
scale_fill_brewer(palette = "PuRd", direction = 1) +
labs(y="Number of drugs", x="", fill="Drug type") +
theme_figures_angle45_x +
scale_y_continuous(expand = c(0.01,0), breaks=c(1,2,3,4,5,6,7,8)) +
scale_x_discrete(expand = c(0.025,0)) +
theme(plot.margin = margin(t=1, b=1, r=1, l=1, unit="cm"))
Fig2a
ggsave(path=(paste0(filepath,"figures")), filename = "Fig2a_drug_compounds.svg")
## Saving 18 x 9 in image
#prepare data
drugs$Drug <- drugs$Drug |>
gsub("\\.", "-", x = _) |>
gsub("D4476", "D 4476", x = _) |>
gsub("IRAK1/4 Inhibitor I", "IRAK-1/4 Inhibitor I", x = _) |>
gsub("SB-203580", "SB 203580", x = _)
drugs$Drug <- sort(drugs$Drug)
drug_list <- drugs |>
dplyr::select(Drug, Target, Pathway) #add supplier
#drug overview
drugs$Drug
## [1] "10058-F4" "A-1210477" "A-674563"
## [4] "Afatinib" "AMI-1" "AZD1208"
## [7] "AZD6738" "AZD8055" "BGJ398"
## [10] "BML-277" "BMS-509744" "Ceritinib"
## [13] "Cisplatin" "Cobimetinib" "Cytarabine"
## [16] "D 4476" "Dabrafenib" "Dasatinib"
## [19] "Doxorubicine" "Duvelisib" "EPZ-6438"
## [22] "Fludarabine" "Foretinib" "Ganetespib"
## [25] "Ibrutinib" "Idelalisib" "Imatinib"
## [28] "IRAK-1/4 Inhibitor I" "JNK-IN-8" "KU-55933"
## [31] "Menin-MLL Inhibitor" "Methotrexate" "MI-503"
## [34] "Midostaurin" "MK-2206" "Motolimod"
## [37] "Nutlin-3a" "Olaparib" "Onalespib"
## [40] "ONO-4059" "OTX015" "Palbociclib"
## [43] "Panobinostat" "PF-3758309" "PRT062607"
## [46] "Quizartinib" "Rapamycin" "RO5963"
## [49] "Ruxolitinib" "SB 203580" "SCH772984"
## [52] "Selinexor" "Selisistat" "SGC 0946"
## [55] "Sunitinib" "Tofacitinib" "Tozasertib"
## [58] "Trametinib" "TW-37" "Venetoclax"
## [61] "Vismodegib" "Volasertib" "ZM 336372"
#create table
stab3 <- flextable(drug_list) # |> stab_theme()
ft_grob <- gen_grob(stab3, fit = "width", scaling = "min", hjust = 0)
STab3 <- ggplot() +
annotation_custom(ft_grob) +
theme_void() +
theme(plot.margin = margin(t = 1, r = 1, b = 1, l = 1, unit = "cm"))
ggsave(width=21, height=29.7, units = "cm", path=(paste0(filepath,"tables")), filename = "STab3_drugs_and_targets.svg")
#prepare data
patients <- patients |>
filter(diagnosis != "") |>
as.data.frame()
patients$cohort <- patients$cohort |>
str_replace("IC50", "IC50 + CPS1000") |>
as.factor()
Fig2c <- ggplot(patients, aes(x = forcats::fct_infreq(diagnosis))) +
geom_bar(aes(fill = cohort)) +
geom_text(stat = "count", aes(label = after_stat(count)), vjust = -0.5, size=2.5) +
scale_fill_brewer(palette = "Greens", direction =-1)+
labs(y="Number of patients", x="", fill="Drug screen")+
theme_figures_angle45_x +
scale_y_continuous(expand = c(0.02,0), breaks=seq(0,300,50), limits = c(0,300))+
scale_x_discrete(expand = c(0.04,0)) +
theme(plot.margin = margin(t=1, b=1, r=1, l=1, unit="cm"))
Fig2c
ggsave(path=(paste0(filepath,"figures")), filename = "Fig2c_disease_distribution.svg")
## Saving 18 x 9 in image
#prepare data
treatments$sample1 <- gsub("-[1234]$", "", treatments$sample1)
treatments$sample2 <- gsub("-[1234]$", "", treatments$sample2)
length(unique(treatments$patientID))
## [1] 110
#merge data
sample_date <- survival[,1:3]
merged_table <- merge(treatments, sample_date, by.x="sample1", by.y="sampleID", all.x=TRUE) |>
merge(patients[,c("patientID", "diagnosis")], by="patientID") |>
dplyr::rename(sampleDate1 = sampleDate)
#add date for 2. sampling and calculate time interval
merged_table <- merge(sample_date, merged_table, by.x="sampleID", by.y="sample2", ) |>
dplyr::rename(sample2 = sampleID, sampleDate2 = sampleDate) |>
dplyr::select(c(patientID, sample1, sample2, sampleDate1, sampleDate2, diagnosis)) |>
mutate(diff = sampleDate2-sampleDate1)
#plot
merged_table$diff <- as.numeric(merged_table$diff)
summary(merged_table$diff)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 7 224 515 578 884 1757
Fig2d <- ggplot(merged_table, aes(x="", y=diff)) +
geom_boxplot(color="black", linewidth=0.5, outliers = FALSE, width=0.4) +
geom_jitter(width = 0.1, alpha = 0.75, size=1, color="#74c476")+
theme_figures+
labs(x = "", y = "Days between samples")+
theme(axis.ticks.x = element_blank())+
scale_y_continuous(expand = c(0.02,0), breaks=c(seq(0,1750,500))) +
theme(plot.margin = margin(t=1, r=1, l=1, b=1, unit="cm"))
Fig2d
ggsave(path=(paste0(filepath,"figures")), filename = "Fig2d_interval_samples.svg")
## Saving 3 x 6 in image
#prepare data
#remove del5IgH
oncoprint_data <- merge(patients, metadata, by.x="patientID", by.y="Patient.ID") |>
filter(diagnosis.x == "CLL") |>
dplyr::select(!del5IgH)
#remove the first two columns (IGHV.status and Methylation_Cluster)
matrix_data <- as.matrix(oncoprint_data[, -c(1:13)])
rownames(matrix_data) <- oncoprint_data$patientID
genetic_variations <- t(matrix_data)
#exclude aberrations with less than 8 events
genetic_variations <- data.matrix(genetic_variations)
genetic_variations_num <- matrix(as.numeric(genetic_variations), ncol = 275)
## Warning in matrix(as.numeric(genetic_variations), ncol = 275): NAs introduced
## by coercion
rownames(genetic_variations_num) <- rownames(genetic_variations)
colnames(genetic_variations_num) <- colnames(genetic_variations)
genetic_variations_num <- as.data.frame(genetic_variations_num)
genetic_variations_num$count <- rowSums(!(is.na(genetic_variations_num) | genetic_variations_num == 0))
genetic_variations_num <- genetic_variations_num |>
filter(count >7)
#re-adjust matrix für heatmap
genetic_variations_num[genetic_variations_num==0]<-""
genetic_variations_num[is.na(genetic_variations_num)]<-""
genetic_variations_red <- dplyr::select(genetic_variations_num, !count)
genetic_variations_red <- as.matrix(genetic_variations_red)
#annotations
#annotation for IGHV status
ighv_anno <- oncoprint_data[,"IGHV.status"]
ighv_anno <- ighv_anno %>% replace(is.na(.), "unknown")
ighv_colors <- colorRampPalette(brewer.pal(9, "Set1"))(length(unique(ighv_anno)))
annotation_colors <- setNames(ighv_colors, unique(ighv_anno))
#annotation for Methylation
meth_anno <- oncoprint_data[,"Methylation_Cluster"]
meth_anno <- factor(meth_anno, levels = c("HP", "IP","LP", "unknown"))
meth_anno <- meth_anno %>% replace(is.na(.), "unknown")
meth_colors <- colorRampPalette(brewer.pal(8, "Set2"))(length(unique(meth_anno)))
annotation_colors_meth <- setNames(meth_colors, unique(meth_anno))
#annotation for center
center_anno <- oncoprint_data[,"project"]
center_anno <- factor(center_anno, levels = c("HD", "HD-DNA", "H68", "MIR34a", "FB", "NOV", "MDACC", "MUN", "HCL_HD_Andrulis", "OX", "BARC", "ZCH_2"))
center_colors <- colorRampPalette(brewer.pal(3, "Paired"))(length(unique(center_anno)))
#change center label
center_anno <- fct_recode(center_anno,
"Zürich" = "ZCH_2",
"Barcelona" = "BARC",
"Heidelberg" = "HD")
annotation_colors_center <- setNames(center_colors, unique(center_anno))
#change IGHV label
ighv_anno <- fct_recode(ighv_anno,
"mutated" = "M",
"unmutated" = "U")
annotation_colors <- setNames(ighv_colors, unique(ighv_anno))
ha <- HeatmapAnnotation(Center = center_anno, IGHV = ighv_anno, Methylation = meth_anno, annotation_name_gp = gpar(fontsize = 8), col = list(Center = annotation_colors_center, IGHV = annotation_colors, Methylation = annotation_colors_meth), annotation_legend_param = list(
Center = list(
title_gp = gpar(fontsize = 8, fontface = "bold"),
labels_gp = gpar(fontsize = 8)
),
IGHV = list(
title_gp = gpar(fontsize = 8, fontface = "bold"),
labels_gp = gpar(fontsize = 8)
),
Methylation = list(
title_gp = gpar(fontsize = 8, fontface = "bold"),
labels_gp = gpar(fontsize = 8)
)))
heatmap_legend_param = list(title="Alteration", at = c("1"), title_gp = gpar(fontsize = 8), labels_gp = gpar(fontsize = 8))
#create the oncoprint
oncoprint <- oncoPrint(
genetic_variations_red,
alter_fun = list(
background = function(x, y, w, h) {
grid.rect(x, y, w, h, gp = gpar(fill = "#F8F8F8", col = "white"))
},
"1" = function(x, y, w, h) grid.rect(x, y, w, h, gp = gpar(fill = "black", col = "white")), col = "white"),
col = c("1" = "black"), na_col = "white",
remove_empty_rows = FALSE,
remove_empty_columns = FALSE,
row_names_gp = gpar(fontsize = 8),
column_names_gp = gpar(fontsize = 8),
row_names_side = "left", pct_side = "right", pct_digits = 1,
right_annotation = rowAnnotation(row_barplot = anno_oncoprint_barplot(c("1"), border = FALSE, height = unit(4, "cm"), axis_param = list(side = "bottom", labels_rot = 0))),
pct_gp = gpar(fontsize = 8), top_annotation = ha, heatmap_legend_param = heatmap_legend_param)
## All mutation types: 1.
## `alter_fun` is assumed vectorizable. If it does not generate correct
## plot, please set `alter_fun_is_vectorized = FALSE` in `oncoPrint()`.
draw(oncoprint, heatmap_legend_side = "left", annotation_legend_side = "right", show_heatmap_legend = FALSE)
Fig2e <- grid.grabExpr(draw(oncoprint, heatmap_legend_side = "left", annotation_legend_side = "right", show_heatmap_legend = FALSE))
ggsave(Fig2e, path=(paste0(filepath,"figures")), filename = "Fig2e_CLL_genetic_landscape.svg")
## Saving 12 x 8 in image
## Warning: Removed 790 rows containing non-finite outside the scale range
## (`stat_density()`).
## Warning: Removed 1161 rows containing non-finite outside the scale range
## (`stat_density()`).
## Saving 18 x 6 in image
## Warning: Removed 790 rows containing non-finite outside the scale range
## (`stat_density()`).
## Removed 1161 rows containing non-finite outside the scale range
## (`stat_density()`).
## Warning: Removed 790 rows containing non-finite outside the scale range
## (`stat_density()`).
## Warning: Removed 1161 rows containing non-finite outside the scale range
## (`stat_density()`).
#cap supravital viability results (cut-off <1.2)
screen_long$viability[screen_long$viability>1.2] <- 1.2
screen_long$viability[screen_long$viability<0] <- 0
#show diseases separately with at least 10 samples
pat_nr_per_disease <- patients |>
group_by(diagnosis) |>
summarize(n_pat = n()) |>
arrange(desc(n_pat))
diagnoses_show <- pat_nr_per_disease |>
filter(n_pat >=10) |>
pull(diagnosis)
df_tsne <- patients[,c("patientID", "diagnosis")] |>
merge(screen, by="patientID") |>
dplyr::select(-patientID, -sampleID) |>
mutate(diagnosis= case_when(diagnosis %in% diagnoses_show ~ diagnosis,
TRUE ~ "other"))
#split dataframe in measures and subsetting
df_meas <- df_tsne[, !names(df_tsne) %in% "diagnosis"]
df_subset <- df_tsne[, names(df_tsne) %in% "diagnosis"]
#define order
order <- c("AML", "B-ALL", "B-PLL", "CLL", "MCL", "T-ALL", "T-PLL", "other")
df_subset <- factor(df_tsne$diagnosis, levels = order)
#t-sne
set.seed(12)
tsne_results <- Rtsne(df_meas, perplexity=25, check_duplicates = FALSE)
#plot
tsne_plot <- data.frame(
x = tsne_results$Y[,1],
y = tsne_results$Y[,2],
diagnosis = df_subset
)
Fig3a <- ggplot(tsne_plot) +
geom_point(aes(x=x, y=y, color=diagnosis), size=2, alpha=0.8) +
theme_figures+
labs(color="Diagnosis")+
scale_color_manual(values=colors_diag)+
theme(plot.margin = margin(t=2, b=2, r=1, l=1, unit="cm"))
Fig3a
ggsave(path=(paste0(filepath,"figures")), filename = "Fig3a_tsne.svg")
## Saving 6 x 6 in image
#prepare data
screen_long_topdiag <- screen_long |>
mutate(diagnosis = case_when(diagnosis %in% diagnoses_show ~ diagnosis,
TRUE ~ "other"))
#calculate mean viability
screen_means <- screen_long_topdiag |>
group_by(patientID, diagnosis, drug) |>
summarize(mean_viab = mean(viability))
## `summarise()` has grouped output by 'patientID', 'diagnosis'. You can override
## using the `.groups` argument.
screen_means
## # A tibble: 28,665 × 4
## # Groups: patientID, diagnosis [455]
## patientID diagnosis drug mean_viab
## <chr> <chr> <chr> <dbl>
## 1 P0001 CLL 10058-F4 0.998
## 2 P0001 CLL A-1210477 0.983
## 3 P0001 CLL A-674563 0.898
## 4 P0001 CLL AMI-1 1.00
## 5 P0001 CLL AZD1208 0.871
## 6 P0001 CLL AZD6738 0.980
## 7 P0001 CLL AZD8055 0.912
## 8 P0001 CLL Afatinib 0.863
## 9 P0001 CLL BGJ398 1.04
## 10 P0001 CLL BML-277 1.06
## # ℹ 28,655 more rows
#merge with metadata for IGHV status in CLL, drop CLL without IGHV annotation
ighv <- metadata[, names(metadata) %in% c("Patient.ID", "IGHV.status")]
screen_means_ighv <- merge(screen_means, ighv, by.x = "patientID", by.y= "Patient.ID") |>
mutate(diagnosis = case_when(IGHV.status == "U" ~ "U-CLL",
IGHV.status == "M" ~ "M-CLL",
TRUE ~ diagnosis)) |>
filter (diagnosis != "CLL") |>
dplyr::select(-IGHV.status)
#prepare for heatmap
df <- screen_means_ighv |>
dplyr::select(-diagnosis) |>
pivot_wider(names_from = patientID, values_from = mean_viab)
df_use <- df |>
dplyr::select(-drug) |>
colnames()
df_subset <- df |>
dplyr::select(-drug)
#diagnoses
df_diagnoses <- screen_means_ighv |>
filter(drug == "10058-F4") |>
dplyr::select(patientID, diagnosis)
#add pathways
df_pathway <- drugs[, names(drugs) %in% c("Drug", "Pathway_mod")]
df_combined <- merge(df_pathway, df, , by.x = "Drug", by.y = "drug")
#order pathways and diagnoses
df_combined$Pathway_mod <- factor(df_combined$Pathway_mod,
levels = c("AKT/mTOR", "B-cell receptor", "Bromodomain/PLK", "Chemotherapy",
"DNA damage response", "JAK/STAT", "MAPK", "PI3K", "Stress pathway", "other"))
df_diagnoses$diagnosis <- factor(df_diagnoses$diagnosis,
levels = c("AML", "B-ALL", "B-PLL", "M-CLL", "MCL", "T-ALL", "T-PLL", "U-CLL", "other"))
#scale
x <- data.frame(df_subset)
y <- data.frame(t(scale(t(x))))
#create matrix
rownames(y) = df$drug
colnames(y) = colnames(df_subset)
z <- as.matrix(y)
#annotations
color_fun_reversed <- colorRamp2(c(4, median(z), -4), c("red", "white", "blue"))
ha = HeatmapAnnotation(
Diagnosis = df_diagnoses$diagnosis,
annotation_name_gp = gpar(fontsize = 8),
col = list(Diagnosis = colors_diag_IGHV),
annotation_legend_param = list(
Diagnosis = list(
title_gp = gpar(fontsize = 8, fontface = "bold"),
labels_gp = gpar(fontsize = 8),
nrow = 2
)
)
)
pway <- as.data.frame(df_combined[,1:2])
pway_ordered = pway[match(rownames(z), pway$Drug), ]
ha2 = rowAnnotation(
Pathway = pway_ordered$Pathway_mod,
annotation_name_gp = gpar(fontsize = 8),
col = list(Pathway = colors_pathways_mod),
annotation_legend_param = list(
Pathway = list(
title_gp = gpar(fontsize = 8, fontface = "bold"),
labels_gp = gpar(fontsize = 8),
nrow = 2
)
)
)
#heatmap
h1 <- Heatmap(z, name = "Z-score", show_row_names = TRUE, show_column_names=FALSE, row_names_gp = gpar(fontsize = 8), width = ncol(z)*unit(0.25, "mm"), height = nrow(z)*unit(3.5, "mm"), col = color_fun_reversed, top_annotation = ha, left_annotation = ha2, show_column_dend = FALSE, row_km = 4, row_title = NULL,
show_parent_dend_line = FALSE, heatmap_legend_param = list(direction = "horizontal", title_gp = gpar(fontsize = 8, fontface = "bold"), labels_gp = gpar(fontsize = 8)))
set.seed(1234)
#set spacing to legend
ht_opt(HEATMAP_LEGEND_PADDING = unit(1.5, "cm"))
Fig3b <- grid.grabExpr(draw(h1, merge_legends = TRUE, heatmap_legend_side = "bottom"))
Fig3b
## gTree[GRID.gTree.2263]
# Reset options
ht_opt(RESET = TRUE)
#save?
#create diagnosis-specific means
screen_mean_diag <- screen_long |>
group_by(diagnosis, drug) |>
summarize(mean_diag = mean(viability)) |>
filter(diagnosis %in% diagnoses_show) |>
pivot_wider(names_from = diagnosis, values_from = mean_diag)
## `summarise()` has grouped output by 'diagnosis'. You can override using the
## `.groups` argument.
#create heatmap
w <- screen_mean_diag[, names(screen_mean_diag) != "drug"]
rownames(w) = screen_mean_diag$drug
## Warning: Setting row names on a tibble is deprecated.
w <- as.matrix(t(w))
set.seed(123)
ha = rowAnnotation(foo = anno_text(rownames(w), gp = gpar(fontsize=8), just="right", offset=1))
## Warning: `offset` is deprecated, use `location` instead.
column_ha = HeatmapAnnotation(foo = anno_text(colnames(w), gp = gpar(fontsize=8), just = "left", offset =0))
## Warning: `offset` is deprecated, use `location` instead.
h2 <- Heatmap(w, name = "Mean viability", left_annotation = ha,
top_annotation = column_ha,
show_row_names = FALSE, show_column_names=FALSE, width = ncol(z)*unit(0.45, "mm"), height = nrow(z)*unit(0.4, "mm"), show_row_dend=TRUE, row_dend_side = "left", show_column_dend=FALSE,
row_dend_width = unit(7, "mm"), heatmap_legend_param = list(title_gp = gpar(fontsize = 8, fontface = "bold"), labels_gp = gpar(fontsize = 8)))
Fig3c_top <- draw(h2, heatmap_legend_side = "right", padding = unit(c(2, 2, 0, 2), "mm"))
#extract heatmap order
clustered_col_indices <- column_order(h2)
## Warning: The heatmap has not been initialized. You might have different results
## if you repeatedly execute this function, e.g. when row_km/column_km was
## set. It is more suggested to do as `ht = draw(ht); column_order(ht)`.
clustered_col_names <- colnames(w)[clustered_col_indices]
#create patient-specific means
screen_mean_pat <- screen_long |>
filter(diagnosis %in% diagnoses_show) |>
group_by(drug, patientID) |>
summarize(mean_pat = mean(viability)) |>
merge(drugs[,c("Drug", "Category")], by.x="drug", by.y="Drug") |>
mutate(drug = factor(drug, levels = clustered_col_names)) |>
arrange(drug)
## `summarise()` has grouped output by 'drug'. You can override using the
## `.groups` argument.
#add boxplots
Fig3c_bottom <- ggplot(screen_mean_pat, aes(x=drug, y=mean_pat, color=Category))+
geom_boxplot(alpha=0.6, outlier.shape=NA) +
geom_jitter(alpha=0.1, size=0.2, width = 0.15)+
theme_figures+
theme(axis.text.x = element_text(color="black", size=8, angle=90, hjust=1, vjust=0.5),
legend.position = "bottom",
panel.spacing = unit(0, "mm"),
plot.margin = margin(t=0, b=2, r=33, l=21, unit = "mm"))+
scale_y_continuous(name ="Mean viability", breaks=seq(0,1.2, 0.2), limits=c(0,1.2)) +
scale_x_discrete(name="")+
scale_color_manual(values=colors_drug_type, name = "Drug type", labels=c("Chemotherapy", "Kinase inhibitor", "Other"))
Fig3c_bottom
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
Fig3c_bottom_aligned <- ggdraw(Fig3c_bottom) +
theme(plot.margin = margin(0, 0, 0, 0))
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
Fig3c <- plot_grid(grid.grabExpr(draw(h2, heatmap_legend_side = "right")), Fig3c_bottom_aligned, ncol=1, rel_heights = c(0.5, 0.5))
#save?
#create plots
aml <- create_boxplot(screen_means_ighv, "AML", colors_diag_IGHV, "AML")
ball <- create_boxplot(screen_means_ighv, "B-ALL", colors_diag_IGHV, "B-ALL")
tall <- create_boxplot(screen_means_ighv, "T-ALL", colors_diag_IGHV, "T-ALL")
SFig3 <- plot_grid(aml, ball, tall, NULL, NULL, ncol = 5, nrow = 1)
## Warning: Removed 23 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 31 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 21 rows containing missing values or values outside the scale range
## (`geom_point()`).
SFig3
ggsave(path=(paste0(filepath,"figures")), filename = "SFig3_viab_leucemia.svg", width=42, height=20, units = "cm")
bpll <- create_boxplot(screen_means_ighv, "B-PLL", colors_diag_IGHV, "B-PLL")
mcll <- create_boxplot(screen_means_ighv, "M-CLL", colors_diag_IGHV, "M-CLL")
mcl <- create_boxplot(screen_means_ighv, "MCL", colors_diag_IGHV, "MCL")
ucll <- create_boxplot(screen_means_ighv, "U-CLL", colors_diag_IGHV, "U-CLL")
tpll <- create_boxplot(screen_means_ighv, "T-PLL", colors_diag_IGHV, "T-PLL")
SFig4 <- plot_grid(bpll, mcll, mcl, ucll, tpll, ncol = 5, nrow = 1)
## Warning: Removed 3 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 51 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 3 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 22 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_point()`).
SFig4
ggsave(path=(paste0(filepath,"figures")), filename = "SFig4_viab_lymphoma.svg", width=42, height=20, units = "cm")
#prepare data
diagnoses_show_ighv <- c(diagnoses_show, "U-CLL", "M-CLL")
diagnoses_show_ighv <- diagnoses_show_ighv[diagnoses_show_ighv != "CLL"]
viabTab.mean <- screen_means_ighv |>
filter(diagnosis %in% diagnoses_show_ighv)
#create p-values
allDiag <- unique(viabTab.mean$diagnosis)
pTabAll <- lapply(allDiag, function(diagBase) {
#calculate p value matrix
pTab <- lapply(allDiag[allDiag != diagBase], function(diagCompare) {
testTab <- filter(viabTab.mean, diagnosis %in% c(diagBase, diagCompare)) |>
mutate(diagnosis = factor(diagnosis, levels = c(diagBase,diagCompare)))
res <- group_by(testTab, drug) |> do(tTest(.$mean_viab, .$diagnosis)) |>
mutate(diagCompare = diagCompare, diagBase = diagBase)
res
}) |> bind_rows() |> ungroup() |> mutate(p.adj = p.adjust(p, method = "BH"))
}) |> bind_rows()
#prepare data table for plot
plotTab <- pTabAll |> mutate(p = ifelse(p < 1e-12, 1e-12, p)) |>
mutate(pSign = -log10(p)*sign(diff)) |>
mutate(pSign = ifelse(p.adj < 0.1, pSign, 0)) |>
ungroup()
#order drugs by their association similarity (hierarchical clustering)
pMat <- mutate(plotTab, diagPair = paste0(diagBase,"_",diagCompare)) |>
dplyr::select(drug, pSign, diagPair) |>
spread(key = diagPair, value = pSign) |> data.frame() |>
column_to_rownames("drug") |> as.matrix()
#cut
set.seed(123)
nClust = 9
hcRes <- hclust(dist(pMat), method = "ward.D2")
drugOrder <- rownames(pMat)[hcRes$order]
clustMap <- cutree(hcRes,nClust)
diagOrder <- c("U-CLL", "M-CLL","MCL", "B-PLL", "T-PLL", "B-ALL", "T-ALL","AML")
plotTab <- mutate(plotTab, drug = factor(drug, levels = drugOrder),
diagBase = factor(diagBase, levels= diagOrder),
diagCompare = factor(diagCompare, levels = diagOrder)) |>
arrange(drug) |> mutate(drugClust = paste0("",nClust-clustMap[as.character(drug)]+1))
#color strips in y-direction
strip <- strip_themed(background_y = elem_list_rect(fill = "white"))
#add pathway annotation
plotTab <- merge(plotTab, drugs[,c("Drug", "Pathway_mod")], by.x="drug", by.y="Drug")
#plot
main_plot <- ggplot(plotTab, aes(x=diagCompare, y = drug, fill = pSign)) +
geom_tile(size = 0.3, color = "white") +
facet_grid2(rows = vars(drugClust), cols = vars(diagBase), scales = "free", space = "free_y", strip = strip) +
scale_fill_gradient2(
high = "blue", mid = "white", low = "red", midpoint = 0)+
theme_figures_facet_angle60_x+
labs(fill = "**-log<sub>10</sub>*P*<br>with direction**") +
theme(legend.title = ggtext::element_markdown())+
ylab("") + xlab("")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
main_plot
#add pathway annotation
anno_plot <- ggplot(plotTab, aes(x = diagCompare, y = drug, fill = Pathway_mod)) +
geom_tile(size = 0.3) +
facet_grid(rows = vars(drugClust), scales = "free", space = "free_y") +
scale_fill_manual(values = colors_pathways_mod,
name = "Drug Class")+
theme_minimal() +
theme(axis.text = element_blank(),
axis.title = element_blank(),
panel.grid = element_blank(),
panel.border = element_rect(linewidth = 0.75),
panel.background = element_blank(),
plot.background = element_blank(),
strip.text.y = element_blank(),
panel.spacing = unit(0.15, "lines"),
legend.position = "none")
Fig3d <- ggdraw() +
draw_plot(main_plot, x=0,y=0,width=1,height=1)+
draw_plot(anno_plot,x=0.8612,y=0.075,width=0.03,height=0.898) &
theme(plot.margin = margin(t=1, unit="cm"))
ggsave(path=(paste0(filepath,"figures")), filename = "Fig3d_pvalue_heatmap.svg", width=42, height=30, units = "cm")
#prepare data
#rank drugs
rankTab <- screen_long |>
group_by(patientID, drug, diagnosis) |>
summarise(meanViab = mean(viability)) |>
dplyr::ungroup()
## `summarise()` has grouped output by 'patientID', 'drug'. You can override using
## the `.groups` argument.
medTab <- screen_long |>
group_by(drug) |>
summarise(medVal = median(viability))
rankTab <- left_join(rankTab, medTab, by = "drug") |>
mutate(score = meanViab - medVal)
#make rank plot
rank_score_tab <- rankTab |>
group_by(patientID, diagnosis) |>
arrange((score)) |>
mutate(rank_score=row_number(), l2fc = log2(meanViab)-log2(medVal)) |>
ungroup()
#show ranks for AML with venetoclax and cytarabine
aml_cyt <- rank_score_tab |>
filter(diagnosis %in% c("AML")) |>
mutate(drug_col = ifelse(drug %in% c("Cytarabine"), drug, "other")) |>
ggplot(aes(x=rank_score, y=score, alpha=drug_col, group=patientID))+
geom_hline(yintercept = 0, color="grey70")+
geom_line(alpha=0.25, color="black")+
geom_point(aes(alpha=drug_col, fill=drug_col, color=drug_col), size=1.5, shape=21)+
scale_alpha_manual(values=c(0.75,0))+
scale_fill_manual(name="Drug", breaks=c("Cytarabine"), values=c("#E31A1C", ""))+
scale_color_manual(name="Drug", breaks=c("Cytarabine"), values=c("#E31A1C", ""))+
labs(title="Cytarabine", y="Score", x="Drug rank")+
theme_bw()+
theme(axis.text = element_text(color="black", size=8),
legend.position = "none", axis.title = element_text(size=8), plot.title = element_text(hjust=0.5, size=8),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_rect(color = "black", linewidth = 1, fill = NA))
aml_ven <- rank_score_tab |>
filter(diagnosis %in% c("AML")) |>
mutate(drug_col = ifelse(drug %in% c("Venetoclax"), drug, "other")) |>
ggplot(aes(x=rank_score, y=score, alpha=drug_col, group=patientID))+
geom_hline(yintercept = 0, color="grey70")+
geom_line(alpha=0.25, color="black")+
geom_point(aes(alpha=drug_col, fill=drug_col, color=drug_col), size=1.5, shape=21)+
scale_alpha_manual(values=c(0, 0.75))+
scale_fill_manual(name="Drug", breaks=c("Venetoclax"), values=c("#E31A1C", ""))+
scale_color_manual(name="Drug", breaks=c("Venetoclax"), values=c("#E31A1C", ""))+
labs(title="Venetoclax", y="Score", x="Drug rank")+
theme_bw()+
theme(axis.text = element_text(color="black", size=8),
legend.position = "none", axis.title = element_text(size=8), plot.title = element_text(hjust=0.5, size=8),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_rect(color = "black", linewidth = 1, fill = NA))
SFig8c <- aml_cyt + aml_ven + plot_annotation(title = "AML", theme = theme(plot.title = element_text(hjust=0.5, size=8, face="bold"))) + theme(plot.margin = margin(t=1, unit = "cm"))
SFig8c
#combine with IGHV annotation
cll_meta <- metadata[, c("Patient.ID", "diagnosis", "IGHV.status")] |>
filter(diagnosis == "CLL")
rank_score_tab_mod <- merge(rank_score_tab, cll_meta[,c("Patient.ID", "IGHV.status")], by.x="patientID", by.y="Patient.ID", all.x = TRUE) |>
mutate(diagnosis = case_when(IGHV.status == "U" ~ "U-CLL",
IGHV.status == "M" ~ "M-CLL",
TRUE ~ diagnosis)) |>
filter(diagnosis != "CLL")
drug_ranking_diag <- rank_score_tab_mod |>
group_by(drug, diagnosis) |>
summarize(median_score = median(score), score_sd = sd(score)) |>
arrange(median_score) |>
group_by(diagnosis) |>
mutate(rank_median = row_number())
## `summarise()` has grouped output by 'drug'. You can override using the
## `.groups` argument.
# plot: aml
drug_order_aml <- drug_ranking_diag |>
filter(diagnosis == "AML") |>
arrange(median_score) |>
pull(drug)
drug_ranking_diag$drug <- factor(drug_ranking_diag$drug, levels=drug_order_aml)
rank_aml <- drug_ranking_diag |>
filter(diagnosis == "AML") |>
mutate(direction = sign(median_score)) |>
ggplot(aes(x=drug, y=median_score, fill=diagnosis))+
geom_col(alpha=0.9) +
geom_errorbar(aes(ymin = median_score-score_sd, ymax = median_score+score_sd),
color="black", alpha=0.75, width=0.5, linewidth=0.5)+
facet_wrap(~diagnosis) +
labs(title="", y="Median score", x="") +
theme_bw() +
guides(color="none")+
scale_fill_manual(values=colors_diag)+
theme(legend.position = "none",
axis.text.x = element_text(angle = 90, hjust=1, vjust=0.5, color="black", size=8),
panel.grid = element_line(colour = "white"),
axis.text.y = element_text(color="black", size=8),
axis.title.y = element_text(size=8),
axis.title.x = element_text(size=8),
panel.border = element_rect(color = "black", linewidth = 1, fill = NA),
strip.background = element_rect(color = "black", linewidth = 1, fill = "grey90"),
plot.title = element_text(hjust=0.5, size=8, face="bold"),
strip.text = element_text(size = 8),
plot.margin = margin(l = 0.5, r = 1, unit = "cm"))
rank_aml
# plot: u-cll
drug_order_ucll <- drug_ranking_diag |>
filter(diagnosis == "U-CLL") |>
arrange(median_score) |>
pull(drug)
drug_ranking_diag$drug <- factor(drug_ranking_diag$drug, levels=drug_order_ucll)
rank_ucll <- drug_ranking_diag |>
filter(diagnosis == "U-CLL") |>
mutate(direction = sign(median_score)) |>
ggplot(aes(x=drug, y=median_score, fill=diagnosis))+
geom_col(alpha=0.9) +
geom_errorbar(aes(ymin = median_score-score_sd, ymax = median_score+score_sd),
color="black", alpha=0.75, width=0.5, linewidth=0.5)+
facet_wrap(~diagnosis) +
labs(title="", y="", x="") +
theme_bw() +
guides(color="none")+
scale_fill_manual(values=colors_diag_IGHV)+
theme(legend.position = "none",
axis.text.x = element_text(angle = 90, hjust=1, vjust=0.5, color="black", size=8),
panel.grid = element_line(colour = "white"),
axis.text.y = element_text(color="black", size=8),
axis.title.y = element_text(size=8),
axis.title.x = element_text(size=8),
panel.border = element_rect(color = "black", linewidth = 1, fill = NA),
strip.background = element_rect(color = "black", linewidth = 1, fill = "grey90"),
plot.title = element_text(hjust=0.5, size=8, face="bold"),
strip.text = element_text(size = 8),
plot.margin = margin(l = 0.5, r = 1, unit = "cm"))
# plot: m-cll
drug_order_mcll <- drug_ranking_diag |>
filter(diagnosis == "M-CLL") |>
arrange(median_score) |>
pull(drug)
drug_ranking_diag$drug <- factor(drug_ranking_diag$drug, levels=drug_order_mcll)
rank_mcll <- drug_ranking_diag |>
filter(diagnosis == "M-CLL") |>
mutate(direction = sign(median_score)) |>
ggplot(aes(x=drug, y=median_score, fill=diagnosis))+
geom_col(alpha=0.9) +
geom_errorbar(aes(ymin = median_score-score_sd, ymax = median_score+score_sd),
color="black", alpha=0.75, width=0.5, linewidth=0.5)+
facet_wrap(~diagnosis) +
labs(title="", y="", x="") +
theme_bw() +
guides(color="none")+
scale_fill_manual(values=colors_diag_IGHV)+
theme(legend.position = "none",
axis.text.x = element_text(angle = 90, hjust=1, vjust=0.5, color="black", size=8),
panel.grid = element_line(colour = "white"),
axis.text.y = element_text(color="black", size=8),
axis.title.y = element_text(size=8),
axis.title.x = element_text(size=8),
panel.border = element_rect(color = "black", linewidth = 1, fill = NA),
strip.background = element_rect(color = "black", linewidth = 1, fill = "grey90"),
plot.title = element_text(hjust=0.5, size=8, face="bold"),
strip.text = element_text(size = 8),
plot.margin = margin(l = 0.5, r = 1, unit = "cm"))
# plot: b-all
drug_order_ball <- drug_ranking_diag |>
filter(diagnosis == "B-ALL") |>
arrange(median_score) |>
pull(drug)
drug_ranking_diag$drug <- factor(drug_ranking_diag$drug, levels=drug_order_ball)
rank_ball <- drug_ranking_diag |>
filter(diagnosis == "B-ALL") |>
mutate(direction = sign(median_score)) |>
ggplot(aes(x=drug, y=median_score, fill=diagnosis))+
geom_col(alpha=0.9) +
geom_errorbar(aes(ymin = median_score-score_sd, ymax = median_score+score_sd),
color="black", alpha=0.75, width=0.5, linewidth=0.5)+
facet_wrap(~diagnosis) +
labs(title="", y="", x="") +
theme_bw() +
guides(color="none")+
scale_fill_manual(values=colors_diag)+
theme(legend.position = "none",
axis.text.x = element_text(angle = 90, hjust=1, vjust=0.5, color="black", size=8),
panel.grid = element_line(colour = "white"),
axis.text.y = element_text(color="black", size=8),
axis.title.y = element_text(size=8),
axis.title.x = element_text(size=8),
panel.border = element_rect(color = "black", linewidth = 1, fill = NA),
strip.background = element_rect(color = "black", linewidth = 1, fill = "grey90"),
plot.title = element_text(hjust=0.5, size=8, face="bold"),
strip.text = element_text(size = 8),
plot.margin = margin(l = 0.5, r = 1, unit = "cm"))
# plot: t-all
drug_order_tall <- drug_ranking_diag |>
filter(diagnosis == "T-ALL") |>
arrange(median_score) |>
pull(drug)
drug_ranking_diag$drug <- factor(drug_ranking_diag$drug, levels=drug_order_tall)
rank_tall <- drug_ranking_diag |>
filter(diagnosis == "T-ALL") |>
mutate(direction = sign(median_score)) |>
ggplot(aes(x=drug, y=median_score, fill=diagnosis))+
geom_col(alpha=0.9) +
geom_errorbar(aes(ymin = median_score-score_sd, ymax = median_score+score_sd),
color="black", alpha=0.75, width=0.5, linewidth=0.5)+
facet_wrap(~diagnosis) +
labs(title="", y="Median score", x="") +
theme_bw() +
guides(color="none")+
scale_fill_manual(values=colors_diag)+
theme(legend.position = "none",
axis.text.x = element_text(angle = 90, hjust=1, vjust=0.5, color="black", size=8),
panel.grid = element_line(colour = "white"),
axis.text.y = element_text(color="black", size=8),
axis.title.y = element_text(size=8),
axis.title.x = element_text(size=8),
panel.border = element_rect(color = "black", linewidth = 1, fill = NA),
strip.background = element_rect(color = "black", linewidth = 1, fill = "grey90"),
plot.title = element_text(hjust=0.5, size=8, face="bold"),
strip.text = element_text(size = 8),
plot.margin = margin(l = 0.5, r = 1, unit = "cm"))
# plot: mcl
drug_order_mcl <- drug_ranking_diag |>
filter(diagnosis == "MCL") |>
arrange(median_score) |>
pull(drug)
drug_ranking_diag$drug <- factor(drug_ranking_diag$drug, levels=drug_order_mcl)
rank_mcl <- drug_ranking_diag |>
filter(diagnosis == "MCL") |>
mutate(direction = sign(median_score)) |>
ggplot(aes(x=drug, y=median_score, fill=diagnosis))+
geom_col(alpha=0.9) +
geom_errorbar(aes(ymin = median_score-score_sd, ymax = median_score+score_sd),
color="black", alpha=0.75, width=0.5, linewidth=0.5)+
facet_wrap(~diagnosis) +
labs(title="", y="Median score", x="") +
theme_bw() +
guides(color="none")+
scale_fill_manual(values=colors_diag)+
theme(legend.position = "none",
axis.text.x = element_text(angle = 90, hjust=1, vjust=0.5, color="black", size=8),
panel.grid = element_line(colour = "white"),
axis.text.y = element_text(color="black", size=8),
axis.title.y = element_text(size=8),
axis.title.x = element_text(size=8),
panel.border = element_rect(color = "black", linewidth = 1, fill = NA),
strip.background = element_rect(color = "black", linewidth = 1, fill = "grey90"),
plot.title = element_text(hjust=0.5, size=8, face="bold"),
strip.text = element_text(size = 8),
plot.margin = margin(l = 0.5, r = 1, unit = "cm"))
# plot: tpll
drug_order_tpll <- drug_ranking_diag |>
filter(diagnosis == "T-PLL") |>
arrange(median_score) |>
pull(drug)
drug_ranking_diag$drug <- factor(drug_ranking_diag$drug, levels=drug_order_tpll)
rank_tpll <- drug_ranking_diag |>
filter(diagnosis == "T-PLL") |>
mutate(direction = sign(median_score)) |>
ggplot(aes(x=drug, y=median_score, fill=diagnosis))+
geom_col(alpha=0.9) +
geom_errorbar(aes(ymin = median_score-score_sd, ymax = median_score+score_sd),
color="black", alpha=0.75, width=0.5, linewidth=0.5)+
facet_wrap(~diagnosis) +
labs(title="", y="", x="") +
theme_bw() +
guides(color="none")+
scale_fill_manual(values=colors_diag)+
theme(legend.position = "none",
axis.text.x = element_text(angle = 90, hjust=1, vjust=0.5, color="black", size=8),
panel.grid = element_line(colour = "white"),
axis.text.y = element_text(color="black", size=8),
axis.title.y = element_text(size=8),
axis.title.x = element_text(size=8),
panel.border = element_rect(color = "black", linewidth = 1, fill = NA),
strip.background = element_rect(color = "black", linewidth = 1, fill = "grey90"),
plot.title = element_text(hjust=0.5, size=8, face="bold"),
strip.text = element_text(size = 8),
plot.margin = margin(l = 0.5, r = 1, unit = "cm"))
# plot: bpll
drug_order_bpll <- drug_ranking_diag |>
filter(diagnosis == "B-PLL") |>
arrange(median_score) |>
pull(drug)
drug_ranking_diag$drug <- factor(drug_ranking_diag$drug, levels=drug_order_bpll)
rank_bpll <- drug_ranking_diag |>
filter(diagnosis == "B-PLL") |>
mutate(direction = sign(median_score)) |>
ggplot(aes(x=drug, y=median_score, fill=diagnosis))+
geom_col(alpha=0.9) +
geom_errorbar(aes(ymin = median_score-score_sd, ymax = median_score+score_sd),
color="black", alpha=0.75, width=0.5, linewidth=0.5)+
facet_wrap(~diagnosis) +
labs(title="", y="Median score", x="") +
theme_bw() +
guides(color="none")+
scale_fill_manual(values=colors_diag)+
theme(legend.position = "none",
axis.text.x = element_text(angle = 90, hjust=1, vjust=0.5, color="black", size=8),
panel.grid = element_line(colour = "white"),
axis.text.y = element_text(color="black", size=8),
axis.title.y = element_text(size=8),
axis.title.x = element_text(size=8),
panel.border = element_rect(color = "black", linewidth = 1, fill = NA),
strip.background = element_rect(color = "black", linewidth = 1, fill = "grey90"),
plot.title = element_text(hjust=0.5, size=8, face="bold"),
strip.text = element_text(size = 8),
plot.margin = margin(l = 0.5, r = 1, unit = "cm"))
SFig8b <- (rank_aml+plot_spacer()+rank_ball+plot_layout(widths = c(1, -0.1 ,1)))/(rank_bpll+plot_spacer()+rank_mcll+plot_layout(widths = c(1, -0.1 ,1)))/(rank_mcl+plot_spacer()+rank_ucll+plot_layout(widths = c(1, -0.1 ,1)))/(rank_tall+plot_spacer()+rank_tpll+plot_layout(widths = c(1, -0.1 ,1))) + theme(plot.margin = margin(t=1, unit = "cm"))
#calculate global ranks
#show all diseases
global_rank_tab <- rank_score_tab_mod |>
group_by(patientID) |>
mutate(global_score = sum(score)) |>
arrange(global_score) |>
group_by(drug) |>
mutate(global_rank = row_number()) |>
filter(drug == "Ibrutinib") |>
#calculate median global rank per diagnosis
group_by(diagnosis) |>
mutate(global_score_median_diag = median(global_score),
global_rank_median_diag = median(global_rank))
#plot
diagSelected <- c("AML", "B-ALL", "T-ALL", "T-PLL", "B-PLL", "MCL", "U-CLL", "M-CLL")
global_rank_tab <- global_rank_tab |>
mutate(diag_col = ifelse(diagnosis %in% diagSelected, diagnosis, "other"))
global_rank_tab$diag_col <- factor(global_rank_tab$diag_col, levels = sort(diagSelected))
# Calculate the line for all data
reference_line <- global_rank_tab |>
arrange(global_rank) |>
dplyr::select(global_rank, global_score)
## Adding missing grouping variables: `diagnosis`
global_rank_tab <- global_rank_tab |>
filter(!is.na(diag_col))
score_ranking <- global_rank_tab |>
arrange(global_score_median_diag) |>
distinct(diag_col) |>
pull(diag_col)
global_rank_tab$diag_col <- factor(global_rank_tab$diag_col, levels = score_ranking)
# Now plot with facets
all_diag <- global_rank_tab |>
ggplot(aes(x=global_rank, y=global_score)) +
geom_hline(data = global_rank_tab |>
distinct(diag_col, global_score_median_diag),
aes(yintercept = global_score_median_diag),
color="grey90")+
geom_line(data = reference_line, color="grey30", linewidth=0.5, inherit.aes=TRUE) +
geom_hline(data = global_rank_tab |>
distinct(diag_col, global_score_median_diag),
aes(yintercept = global_score_median_diag),
color="grey80")+
geom_point(aes(color=diag_col), alpha=0.8, size=1.5) +
#geom_point(aes(x=global_rank_median_diag, y=global_score_median_diag), color="grey80",size=1.5, shape=21) +
facet_wrap(~diag_col, ncol=4) +
scale_alpha_manual(values=c(rep(0.5, length(diagSelected)))) +
scale_color_manual(values=colors_diag_IGHV)+
labs(title="", y="Sum of drug scores", x="Sample rank") +
theme_bw() +
guides(alpha = "none", color="none")+
theme(legend.position = "none",
axis.text.x = element_text(angle = 0, color="black", size=8),
panel.grid = element_line(colour = "white"),
axis.text.y = element_text(color="black", size=8),
axis.title.y = element_text(size=8),
axis.title.x = element_text(size=8),
panel.border = element_rect(color = "black", linewidth = 1, fill = NA),
strip.background = element_rect(color = "black", linewidth = 1, fill = "grey90"),
plot.title = element_text(hjust=0.5, size=8, face="bold"),
strip.text = element_text(size = 8),
plot.margin = margin(l = 0.5, r = 1, unit = "cm"))
SFig8a <- all_diag + theme(plot.margin = margin(t=1, l=1, r=1, unit = "cm"))
#multipanel
row3 <- plot_grid(SFig8c, NULL, ncol = 2, rel_widths = c(0.4, 0.6))
SFig.5 <- plot_grid(SFig8a, SFig8b, row3, rel_heights = c(0.2, 0.65, 0.15), ncol = 1,
align = "v", axis = c("l", "r"),
labels=c("A", "B", "C"), label_size=14) & theme(plot.margin=margin(r=1,l=1,t=1,b=1,unit="cm"))
ggsave(path=(paste0(filepath,"figures")), filename = "FigS5.svg", width=42, height=59.4, units = "cm")
ggsave(path=(paste0(filepath,"figures")), filename = "FigS5.pdf", width=42, height=59.4, units = "cm")
ggsave(path=(paste0(filepath,"figures")), filename = "FigS5.png", width=42, height=59.4, units = "cm", dpi=600) #remove later
#prepare data
drugs_show <- c("Ibrutinib", "PF-3758309", "AZD8055", "Venetoclax", "Duvelisib")
drug_targets <- c("Ibrutinib" = "BTK", "PF-3758309" = "PAK", "AZD8055" = "mTOR", "Venetoclax" = "BCL2", "Duvelisib" = "PI3K")
screen_long_selected_drugs <- screen_long |>
filter(drug %in% drugs_show) |>
group_by(drug, diagnosis, conc_index) |>
mutate(mean_viab=mean(viability),
std.error=std.error(viability))
screen_long_selected_drugs$concentration <- as.character(screen_long_selected_drugs$concentration)
#show drug-response for BTK, PAK and mTOR inhibitor
#helper function for plots
viab_plot_list <- list()
viab_plot_list <- map(drugs_show, ~{
screen_long_selected_drugs |>
filter(drug == .x, diagnosis %in% diagnoses_show) |>
ggplot(aes(x=concentration, y=mean_viab, group=diagnosis, color=diagnosis)) +
geom_errorbar(aes(ymin=mean_viab-std.error, ymax=mean_viab+std.error),
width=.5, position=position_dodge(0.01)) +
geom_line() +
theme_figures_angle45_x +
labs(title=paste0(.x, " (", drug_targets[.x], ")"),
y="Viability", x=bquote("Concentration ("*mu*"M)"),
color="Diagnosis") +
scale_y_continuous(breaks=seq(0,1.0, 0.2), limits=c(0,1.1)) +
scale_color_manual(values=colors_diag)
}) |>
set_names(drugs_show)
#combine plots
Fig3e <- viab_plot_list$Ibrutinib + viab_plot_list$`PF-3758309` + viab_plot_list$AZD8055 +
plot_layout(ncol=3, guides = 'collect') &
theme(legend.position='right', legend.title= element_text(size=8, face="bold"),
plot.margin = margin(t=0.5, b=0.5, l=0.5, r=0.5, unit = "cm")) &
guides(
color = guide_legend(ncol = 1),
fill = guide_legend(ncol = 1),
shape = guide_legend(ncol = 1),
linetype = guide_legend(ncol = 1)
)
Fig3e
ggsave(path=(paste0(filepath,"figures")), filename = "Fig3e_viab_btk_pak_mtor.svg")
## Saving 12 x 6 in image
#show drug-response for BCL2 inhibtor (venetoclax)
diagnoses_ven <- c("CLL", "AML", "B-ALL", "T-ALL", "MCL", "Sezary")
screen_long_ven <- screen_long_selected_drugs |>
filter(drug == "Venetoclax", diagnosis %in% diagnoses_ven) |>
group_by(diagnosis, concentration) |>
summarize(med_viab=median(viability))
## `summarise()` has grouped output by 'diagnosis'. You can override using the
## `.groups` argument.
#fit dose-response models
screen_long_ven$concentration <- as.numeric(screen_long_ven$concentration)
models_ven <- map(diagnoses_ven, function(dx) {
data <- screen_long_ven |> filter(diagnosis == dx)
drm(med_viab ~ concentration,
data = data,
fct = LL.4(names = c("hill", "min_value", "max_value", "ec_50")))
})
names(models_ven) <- tolower(diagnoses_ven)
#create predictions based on models
predictions_ven <- map(diagnoses_ven, function(dx) {
data <- screen_long_ven |> filter(diagnosis == dx)
model <- models_ven[[tolower(dx)]]
new_data <- data.frame(
concentration = seq(min(data$concentration), max(data$concentration), length.out = 500)
)
new_data$pred <- predict(model, newdata = new_data)
new_data$diagnosis <- dx
new_data
}) |>
bind_rows()
#calculate IC50 values for all diagnoses
ic50_ven <- do.call(rbind, lapply(diagnoses_ven, function(dx) {
model <- models_ven[[tolower(dx)]]
coefs <- setNames(model$coefficients, c("hill", "min_value", "max_value", "ec_50"))
ic_50 <- with(as.list(coefs),
exp(log(ec_50) + (1 / hill) * log(max_value / (max_value - 2 * min_value))))
data.frame(ic50 = ic_50, diagnosis = dx)
}))
## Warning in log(max_value/(max_value - 2 * min_value)): NaNs produced
#set colors
colors_ven <- c(setNames("darkgrey", "Sezary"), colors_diag)
#order of diagnoses
order_ven <- ic50_ven |>
arrange(ic50) |>
pull(diagnosis)
#plot
data_ven <- screen_long_selected_drugs |>
filter(drug == "Venetoclax", diagnosis %in% diagnoses_ven)
data_ven$diagnosis <- factor(data_ven$diagnosis, levels = order_ven)
data_ven$concentration <- as.numeric(data_ven$concentration)
predictions_ven$diagnosis <- factor(predictions_ven$diagnosis, levels = order_ven)
ic50_ven$diagnosis <- factor(ic50_ven$diagnosis, levels = order_ven)
Fig3f <- data_ven |>
ggplot(aes(x=concentration, y=viability, group=diagnosis, color=diagnosis)) +
geom_boxplot(aes(group=concentration), alpha = 0.8, outlier.shape = NA) +
geom_jitter(aes(group=concentration), alpha=0.2, size=1, width = 0.15) +
facet_wrap(~diagnosis, ncol=6) +
geom_line(data = predictions_ven, aes(x = concentration, y = pred, color = diagnosis))+
geom_vline(data = ic50_ven, aes(xintercept = ic50),
linetype = "dashed", color = "black", size = 0.4) +
scale_x_log10(breaks = c(0.001, 0.01, 0.1, 1),
labels = trans_format("log10", math_format(10^.x)))+
scale_y_continuous(breaks=seq(0,1.0, 0.2), limits=c(0,1.1)) +
theme_figures_facet+
labs(title="Venetoclax (BCL2)", x=bquote("Concentration ("*mu*"M)"), y = "Viability") +
scale_color_manual(values=colors_ven)+
theme(plot.margin = margin(t=1, b=1, r=1, l=1, unit="cm"))
Fig3f
## Warning: Removed 22 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 23 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_vline()`).
ggsave(path=(paste0(filepath,"figures")), filename = "Fig3f_venetoclax.svg")
## Saving 12 x 6 in image
## Warning: Removed 22 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 23 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_vline()`).
#duvelisib
Fig3g <- screen_long_selected_drugs |>
filter(diagnosis %in% c("B-ALL", "T-ALL"), drug == "Duvelisib") |>
ggplot(aes(x=factor(concentration), y=viability, group=interaction(diagnosis, concentration), color=diagnosis)) +
geom_boxplot(alpha = 0.8, outlier.shape = NA)+
geom_jitter(alpha = 0.2, size=1, position = position_jitterdodge())+
labs(title="Duvelisib (PI3K)", y="Viability", x=bquote("Concentration ("*mu*"M)"), color = "Diagnosis")+
scale_y_continuous(breaks=seq(0,1.2, 0.2), limits=c(0,1.2))+
theme_figures_angle45_x+
scale_color_manual(values=colors_diag)+
theme(plot.margin = margin(t=1, b=1, r=1, l=1, unit="cm"))
Fig3g
ggsave(path=(paste0(filepath,"figures")), filename = "Fig3g_duvelisib.svg")
## Saving 12 x 6 in image
#cobimetinib (...)
#quizartinib (...)
## Warning: Removed 22 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 22 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_vline()`).
## `summarise()` has grouped output by 'patientID'. You can override using the
## `.groups` argument.
## Warning in text.default(pos.xlabel[, 1], pos.xlabel[, 2], newcolnames, srt =
## tl.srt, : "cl.col" is not a graphical parameter
## Warning in text.default(pos.ylabel[, 1], pos.ylabel[, 2], newrownames, col =
## tl.col, : "cl.col" is not a graphical parameter
## Warning in title(title, ...): "cl.col" is not a graphical parameter
## `summarise()` has grouped output by 'patientID'. You can override using the
## `.groups` argument.
## Warning in text.default(pos.xlabel[, 1], pos.xlabel[, 2], newcolnames, srt =
## tl.srt, : "cl.col" is not a graphical parameter
## Warning in text.default(pos.ylabel[, 1], pos.ylabel[, 2], newrownames, col =
## tl.col, : "cl.col" is not a graphical parameter
## Warning in title(title, ...): "cl.col" is not a graphical parameter
## Warning: No shared levels found between `names(values)` of the manual scale and the
## data's fill values.
## Warning: No shared levels found between `names(values)` of the manual scale and the
## data's fill values.
## Warning in as_grob.default(plot): Cannot convert object of class NULL into a
## grob.
#select drugs
drugs_show_corr <- c("SCH772984", "Cobimetinib", "Ibrutinib", "Idelalisib", "Cobimetinib", "Rapamycin")
#helper function for CLL
corr_drugs_cll <- screen_means |>
filter(diagnosis == c("CLL"), drug %in% drugs_show_corr) |>
pivot_wider(names_from = drug, values_from = mean_viab)
corr_drugs_aml <- screen_means |>
filter(diagnosis == c("AML"), drug %in% drugs_show_corr) |>
pivot_wider(names_from = drug, values_from = mean_viab)
corr_drugs <- rbind(corr_drugs_cll, corr_drugs_aml)
plot_corr <- function(data, x, y, xlab, ylab) {
corr_labels <- data |>
group_by(diagnosis) |>
summarize(corr=cor(.data[[x]], .data[[y]], use = "complete.obs"),
.groups = "drop")
ggplot(data, aes_string(x = x, y = y)) +
geom_point(aes(color = diagnosis), alpha = 0.5) +
geom_smooth(method = "lm", se = TRUE,
color = "black", fill = "lightgrey") +
labs(x = xlab, y = ylab, color="Diagnosis")+
facet_wrap(~diagnosis)+
theme_figures +
theme(strip.background = element_blank(),
strip.text.x = element_blank())+
geom_text(data = corr_labels, aes(x = 0.8, y = 1.1,
label = paste0("italic(R) == ", round(corr, 2))), size=3, parse = TRUE) +
xlim(0.4,1.2) + ylim(0.4,1.2) +
scale_color_manual(values = colors_diag)
}
#plot
p1 <- plot_corr(corr_drugs, "SCH772984", "Cobimetinib", xlab="SCH772984 (ERK)", ylab= "Cobimetinib (MEK)")
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
p2 <- plot_corr(corr_drugs, "Ibrutinib", "Idelalisib", xlab="Ibrutinib (BTK)", ylab= "Idelalisib (PI3K)")
p3 <- plot_corr(corr_drugs, "Ibrutinib", "Cobimetinib", xlab="Ibrutinib (BTK)", ylab= "Cobimetinib (MEK)")
p4 <- plot_corr(corr_drugs, "Rapamycin", "Idelalisib", xlab="Rapamycin (mTOR)", ylab= "Idelalisib (PI3K)")
Fig4b <- p1+plot_spacer()+p2+plot_spacer()+p3+plot_spacer()+p4+plot_layout(ncol=7, widths = c(4, 0.5, 4, 0.5, 4, 0.5, 4), guides = 'collect') & theme(legend.position='right', plot.margin = margin(t=1, l=0.5, unit = "cm"))
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
#prepare data
#pTab <- ... (adapt from Junyans script)
pTab <- read.csv(paste0(filepath,"data/pTable_drug_VS_gene.csv"), sep=";") |>
mutate(across(c(p, p.adj, diff), ~as.numeric(gsub(",", ".", as.character(.)))))
#number of significant associations per gene (10% FDR)
plotTab1 <- filter(pTab, p.adj <= 0.1) %>% group_by(gene) %>%
summarise(number = length(drug)) %>%
arrange(desc(number)) %>% mutate(gene = factor(gene, levels = gene))
#p value scatter plot (IGHV not shown)
top_genes <- as.vector(unlist(plotTab1[2:8,"gene"]))
showGenes <- sort(top_genes)
append(showGenes, "other")
## [1] "DDX3X" "del11q" "del13q" "del17p" "gain19p" "TP53"
## [7] "trisomy12" "other"
#define colors for each gene
library(RColorBrewer)
colorList <- c(colorRampPalette(brewer.pal(7, 'Set2'))(7),
"grey80", "grey90")
names(colorList) = c(showGenes, "other", "Below 10% FDR")
colorList["del17p"] <- "#FC8D62"
colorList["del11q"] <- "#E5C494"
colorList["trisomy12"] <- "#FFD92F"
colorList["DDX3X"] <- "#E78AC3"
colorList["TP53"] <- "#66C2A5"
#define shapes for viability
shapeList <- ifelse(names(colorList) %in% showGenes, 19,1)
names(shapeList) <- names(colorList)
shapeList["other"] <- 19
shapeList <- c(1, 25, 24)
names(shapeList) = c("Below 10% FDR", "Down", "Up")
#order drugs by their association similarity
pMat <- dplyr::select(pTab, drug, gene, p) %>%
filter(gene!="IGHV.status") %>%
spread(key = gene, value = p) %>%
column_to_rownames("drug") %>%
as.matrix()
hc <- hclust(dist(pMat), method = "ward.D2")
drugOrder <- rownames(pMat)[hc$order]
#order drugs by p value
pval_list <- pTab |>
filter(gene != "IGHV.status") |>
group_by(drug) |>
summarize(pval = min(p)) |>
arrange(pval, descending = FALSE)
drugOrder2 <- pval_list$drug
#prepare plot table
plotTab <- pTab %>% filter(gene != "IGHV.status") %>%
mutate(gene = ifelse(gene %in% showGenes, gene, "other")) %>%
mutate(gene_FDR = ifelse(p.adj < 0.1, gene, "Below 10% FDR"),
drug = factor(drug, levels = drugOrder2),
gene = factor(gene, levels = names(colorList)),
direction = ifelse(gene_FDR == "Below 10% FDR", "Below 10% FDR", ifelse(sign(diff)>0, "Down", "Up")))
#plot
Fig5a <- ggplot(plotTab, aes(x=drug, y = -log10(p), color = gene, shape = direction)) +
geom_point(size =2, alpha = 0.8, stroke = 1) + scale_color_manual(values = colorList) +
scale_shape_manual(values = shapeList, name = "Viability effect") + theme_bw() +
labs(x="", y = expression(-log[10]*italic(P)), color = "Genetic aberration")+
theme_figures_border+
theme(axis.text.x = element_text(angle=45, hjust=1, vjust=1),
panel.grid.major.x = element_line(size=.1, color="grey50",
linetype = "dotted"),
panel.grid.major.y = element_blank(),
panel.grid.minor.y = element_blank(),
legend.position = c(.95, .95),
legend.justification = c("right", "top"),
legend.box.just = "right",
legend.box = "horizontal",
legend.margin = margin(6, 6, 6, 6))+
geom_hline(yintercept=-log10(0.004706046), linetype = "dashed", linewidth=0.3, color="black")+
annotate("text", label="FDR 10%", size=3, x=60, y=3)+
guides(shape = guide_legend(order = 1),
color = guide_legend(order = 2))
Fig5a <- Fig5a + theme(plot.margin = margin(t=1, l=1, r=1, unit = "cm"))
Fig5a
ggsave(path=(paste0(filepath,"figures")), filename = "Fig5a_pvalue_scatterplot.svg")
## Saving 18 x 9 in image
#IGHV
#perform t-tests for each drug and concentration level
p_values <- screen_long |>
filter(diagnosis == "CLL") |>
merge(metadata, by.x="patientID", by.y = "Patient.ID") |>
filter(!(is.na(IGHV.status))) |>
group_by(drug, conc_index) |>
summarize(tTest(viability, IGHV.status), .groups = "drop") |>
rename("meanU" = "mean1", "meanM" = "mean2") |>
ungroup() |>
mutate(p.adj = p.adjust(p, method = "BH"),
neg_log10_p = -log10(as.vector(p)))
#calculate overall mean viability difference in percent per drug between groups for plot
diffviab_overall <- screen_means |>
filter(diagnosis == "CLL") |>
merge(metadata, by.x="patientID", by.y = "Patient.ID") |>
filter(!(is.na(IGHV.status))) |>
group_by(drug, IGHV.status) |>
summarize(overall_mean = mean(mean_viab), .groups = "drop") |>
pivot_wider(names_from = "IGHV.status", values_from = "overall_mean") |>
mutate(overall_diff_viab = (U-M)/(U)*100)
#add p values (mean) and number of signifiantly different concentration levels per drug
diffviab_p_values_overall <- p_values |>
mutate(count = case_when(p.adj < 0.1 ~ 1, TRUE ~ 0)) |>
group_by(drug) |>
summarize(logpval = mean(neg_log10_p), sign_counts = sum(count)) |>
left_join(diffviab_overall, by="drug") |>
merge(drugs[,c("Drug", "Pathway_mod2")], by.x="drug", by.y="Drug")
#define order
order_pway2 <- sort(unique(diffviab_p_values_overall$Pathway_mod2))
order_pway2 <- c(order_pway2[order_pway2 != "other"], "other")
diffviab_p_values_overall$Pathway_mod2 <- factor(diffviab_p_values_overall$Pathway_mod2, levels = order_pway2)
#plot
set.seed(123)
volcano_ighv <- ggplot(diffviab_p_values_overall, aes(x = overall_diff_viab, y = logpval, label=drug)) +
geom_hline(yintercept = -log10(0.05),
linetype = "dashed",
color = "black") +
geom_vline(xintercept = 0,
color = "black") +
geom_point(aes(color=Pathway_mod2, size=sign_counts), alpha = 0.8) +
geom_text_repel(
text.padding = 0.5,
box.padding = 0.5,
max.overlaps = 10,
min.segment.length = 2,
size=3)+
theme_figures+
labs(
title = "IGHV",
x = "Difference in mean viability (%)",
y = expression(-log[10]*italic(P)),
color="Pathway", size="Number of sign. \ndifferent concentrations") +
scale_x_continuous(limits = c(-20, 20)) +
scale_color_manual(values=colors_pathways_mod2) +
guides(color = guide_legend(override.aes = list(size = 4)))+
annotate("text", label="more active\nin U-CLL", y=20, x=-10, size=3)+
annotate("text", label="more active\nin M-CLL", y=20, x=10, size=3)
## Warning in geom_text_repel(text.padding = 0.5, box.padding = 0.5, max.overlaps
## = 10, : Ignoring unknown parameters: `text.padding`
Fig5b_top <- volcano_ighv + theme(plot.margin = margin(t=1, l=1, unit = "cm"))
Fig5b_top
## Warning: ggrepel: 49 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
#trisomy12
#perform t-tests for each drug and concentration level
p_values_tri12 <- screen_long |>
filter(diagnosis == "CLL") |>
merge(metadata, by.x="patientID", by.y = "Patient.ID") |>
filter(!(is.na(trisomy12))) |>
group_by(drug, conc_index) |>
summarize(tTest(viability, trisomy12), .groups = "drop") |>
rename("mean_mut" = "mean1", "mean_unmut" = "mean2") |>
ungroup() |>
mutate(p.adj = p.adjust(p, method = "BH"),
neg_log10_p = -log10(as.vector(p)))
#calculate overall mean viability difference in percent per drug between groups for plot
diffviab_overall_tri12 <- screen_means |>
filter(diagnosis == "CLL") |>
merge(metadata, by.x="patientID", by.y = "Patient.ID") |>
filter(!(is.na(trisomy12))) |>
group_by(drug, trisomy12) |>
summarize(overall_mean = mean(mean_viab), .groups = "drop") |>
pivot_wider(names_from = "trisomy12", values_from = "overall_mean") |>
rename("mut" = "1", "unmut" = "0") |>
mutate(overall_diff_viab = (mut-unmut)/(mut)*100)
#add p values (mean) and number of signifiantly different concentration levels per drug
diffviab_p_values_overall_tri12 <- p_values_tri12 |>
mutate(count = case_when(p.adj < 0.1 ~ 1, TRUE ~ 0)) |>
group_by(drug) |>
summarize(logpval = mean(neg_log10_p), sign_counts = sum(count)) |>
left_join(diffviab_overall_tri12, by="drug") |>
merge(drugs[,c("Drug", "Pathway_mod2")], by.x="drug", by.y="Drug")
#plot
diffviab_p_values_overall_tri12$Pathway_mod2 <- factor(diffviab_p_values_overall_tri12$Pathway_mod2, levels = order_pway2)
volcano_tri12 <- ggplot(diffviab_p_values_overall_tri12, aes(x = overall_diff_viab, y = logpval, label=drug)) +
geom_hline(yintercept = -log10(0.05),
linetype = "dashed",
color = "black") +
geom_vline(xintercept = 0,
color = "black") +
geom_point(aes(color=Pathway_mod2, size=sign_counts), alpha = 0.8) +
geom_text_repel(
text.padding = 0.5,
box.padding = 0.5,
max.overlaps = 10,
min.segment.length = 2,
size=3)+
theme_figures+
labs(
title = "trisomy12",
x = "Difference in mean viability (%)",
y = expression(-log[10]*italic(P)),
color="Pathway", size="Number of sign. \ndifferent concentrations") +
scale_x_continuous(limits = c(-20, 20)) +
scale_color_manual(values=colors_pathways_mod2) +
guides(color = guide_legend(override.aes = list(size = 4)))+
annotate("text", label="more active\nin trisomy12", y=15, x=-10, size=3)+
annotate("text", label="more active\nin wt", y=15, x=10, size=3)
## Warning in geom_text_repel(text.padding = 0.5, box.padding = 0.5, max.overlaps
## = 10, : Ignoring unknown parameters: `text.padding`
Fig5b_bottom <- volcano_tri12 + theme(plot.margin = margin(t=1, l=1, unit = "cm"))
Fig5b_bottom
## Warning: ggrepel: 53 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
#TP53
#perform t-tests for each drug and concentration level
p_values_tp53 <- screen_long |>
filter(diagnosis == "CLL") |>
merge(metadata, by.x="patientID", by.y = "Patient.ID") |>
filter(!(is.na(TP53))) |>
group_by(drug, conc_index) |>
summarize(tTest(viability, TP53), .groups = "drop") |>
rename("mean_mut" = "mean1", "mean_unmut" = "mean2") |>
ungroup() |>
mutate(p.adj = p.adjust(p, method = "BH"),
neg_log10_p = -log10(as.vector(p)))
#calculate overall mean viability difference in percent per drug between groups for plot
diffviab_overall_tp53 <- screen_means |>
filter(diagnosis == "CLL") |>
merge(metadata, by.x="patientID", by.y = "Patient.ID") |>
filter(!(is.na(TP53))) |>
group_by(drug, TP53) |>
summarize(overall_mean = mean(mean_viab), .groups = "drop") |>
pivot_wider(names_from = "TP53", values_from = "overall_mean") |>
rename("mut" = "1", "unmut" = "0") |>
mutate(overall_diff_viab = (mut-unmut)/(mut)*100)
#add p values (mean) and number of signifiantly different concentration levels per drug
diffviab_p_values_overall_tp53 <- p_values_tp53 |>
mutate(count = case_when(p.adj < 0.1 ~ 1, TRUE ~ 0)) |>
group_by(drug) |>
summarize(logpval = mean(neg_log10_p), sign_counts = sum(count)) |>
left_join(diffviab_overall_tp53, by="drug") |>
merge(drugs[,c("Drug", "Pathway_mod2")], by.x="drug", by.y="Drug")
#plot
diffviab_p_values_overall_tp53$Pathway_mod2 <- factor(diffviab_p_values_overall_tp53$Pathway_mod2, levels = order_pway2)
volcano_tp53 <- ggplot(diffviab_p_values_overall_tp53, aes(x = overall_diff_viab, y = logpval, label=drug)) +
geom_hline(yintercept = -log10(0.05),
linetype = "dashed",
color = "black") +
geom_vline(xintercept = 0,
color = "black") +
geom_point(aes(color=Pathway_mod2, size=sign_counts), alpha = 0.8) +
geom_text_repel(
text.padding = 0.5,
box.padding = 0.5,
max.overlaps = 10,
min.segment.length = 2,
size=3)+
theme_figures+
labs(
title = "TP53",
x = "Difference in mean viability (%)",
y = expression(-log[10]*italic(P)),
color="Pathway", size="Number of sign. \ndifferent concentrations") +
scale_x_continuous(limits = c(-15, 15)) +
scale_color_manual(values=colors_pathways_mod2) +
guides(color = guide_legend(override.aes = list(size = 4)))+
annotate("text", label="more active\nin mutated", y=8, x=-7.5, size=3)+
annotate("text", label="more active\nin wt", y=8, x=7.5, size=3)
## Warning in geom_text_repel(text.padding = 0.5, box.padding = 0.5, max.overlaps
## = 10, : Ignoring unknown parameters: `text.padding`
FigS7f <- volcano_tp53 + theme(plot.margin = margin(t=1, l=1, unit = "cm"))
FigS7f
## Warning: ggrepel: 50 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
#select B-cell receptor inhibitors, show response for U-CLL
drugs_bcr <- c("Ibrutinib", "ONO-4059", "PRT062607")
df_bcr <- screen_long |>
filter(diagnosis == "CLL", drug %in% drugs_bcr) |>
merge(metadata[,c("Patient.ID", "IGHV.status", "DDX3X")], by.x = "patientID", by.y = "Patient.ID") |>
filter(IGHV.status == "U", !is.na(DDX3X))
#plot
bcr_ddx3x <- df_bcr |>
mutate(drug = case_when(drug == "Ibrutinib" ~ "Ibrutinib (BTK)",
drug == "ONO-4059" ~ "ONO-4059 (BTK)",
drug == "PRT062607" ~ "PRT062607 (SYK)",
TRUE ~ drug)) |>
ggplot(aes(x=factor(concentration), y=viability, color=DDX3X)) +
geom_boxplot(alpha = 0.3, width = 0.5, position=position_dodge(0.75), outlier.shape = NA) +
geom_jitter(alpha = 0.25, position = position_jitterdodge()) +
facet_wrap(.~drug) +
theme_figures_facet_angle45_x_legend+
stat_compare_means(aes(group = DDX3X),
method = "t.test",
label = "p.signif",
label.y = 1.1,
size=3)+
labs(y = "Viability", x = bquote("Concentration ("*mu*"M)"), title ="U-CLL")+
scale_color_manual(values=colors_ddx3x, labels=c("unmutated", "mutated")) +
scale_y_continuous(breaks=seq(0,1.0, 0.2), limits=c(0,1.15)) +
guides(color = guide_legend(override.aes = list(label = "")))
Fig5c <- bcr_ddx3x + theme(plot.margin = margin(t=0.5, l=1, r=1, b=1, unit = "cm"))
Fig5c
#select ibrutinib and idelalisb, show response for U-CLL
drugs_ddx3x <- c("Ibrutinib", "Idelalisib")
btk_pi3k <- screen_long |>
filter(diagnosis == "CLL", drug %in% drugs_ddx3x) |>
merge(metadata[,c("Patient.ID", "IGHV.status", "DDX3X")], by.x = "patientID", by.y = "Patient.ID") |>
filter(IGHV.status == "U", !is.na(DDX3X)) |>
mutate(drug = case_when(drug == "Ibrutinib" ~ "Ibrutinib (BTK)", drug == "Idelalisib" ~ "Idelalisib (PI3K)")) |>
pivot_wider(names_from = "drug", values_from = "viability")
#plot
btk_pi3k_ddx3x <- btk_pi3k |>
ggplot(aes(x=`Idelalisib (PI3K)`, y=`Ibrutinib (BTK)`, color=DDX3X)) +
geom_point(alpha = 0.75) +
geom_abline(slope=1, linetype="dashed")+
scale_color_manual(values=colors_ddx3x, labels=c("unmutated", "mutated"))+
theme_figures_facet+
scale_y_continuous(breaks=seq(0.4,1.0, 0.2), limits=c(0.3,1.1)) +
scale_x_continuous(breaks=seq(0.4,1.0, 0.2), limits=c(0.3,1.1)) +
guides(color = guide_legend(override.aes = list(label = ""))) +
facet_wrap(~concentration, ncol=5)+
labs(title=expression(bold(paste("Concentration (", mu, "M)"))), x="Idelalisib (PI3K)", y= "Ibrutinib (BTK)")
SFig11b <- btk_pi3k_ddx3x + theme(plot.margin = margin(l = 1, r = 1, t=1, b=1.5, unit = "cm"))
SFig11b
## `summarise()` has grouped output by 'patientID'. You can override using the
## `.groups` argument.
## Warning: There were 5 warnings in `mutate()`.
## The first warning was:
## ℹ In argument: `HIPO.ID = .Primitive("as.integer")(HIPO.ID)`.
## Caused by warning:
## ! NAs introduced by coercion
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 4 remaining warnings.
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning in cor(as.vector(y), as.vector(y.pred)): the standard deviation is zero
## Warning: `as.tibble()` was deprecated in tibble 2.0.0.
## ℹ Please use `as_tibble()` instead.
## ℹ The signature and semantics have changed, see `?as_tibble`.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: The `guide` argument in `scale_*()` cannot be `FALSE`. This was deprecated in
## ggplot2 3.3.4.
## ℹ Please use "none" instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
#select B-cell receptor inhibitors
del11q <- screen_means |>
filter(diagnosis == "CLL", drug %in% drugs_bcr) |>
merge(metadata[,c("Patient.ID", "IGHV.status", "del11q")], by.x = "patientID", by.y = "Patient.ID") |>
filter(!is.na(IGHV.status), !is.na(del11q)) |>
group_by(patientID, drug) |>
mutate(drug = case_when(drug == "Ibrutinib" ~ "Ibrutinib (BTK)", drug == "ONO-4059" ~ "ONO-4059 (BTK)", drug == "PRT062607" ~ "PRT062607 (SYK)"))
del11q_ighv <- del11q |>
filter(drug == "Ibrutinib (BTK)") |>
ggplot(aes(x=del11q, y=mean_viab, color=del11q)) +
geom_boxplot(alpha = 1, width = 0.5, position=position_dodge(0.75), outlier.shape = NA) +
geom_jitter(aes(shape=IGHV.status), width = 0.1, alpha = 0.75) +
scale_color_manual(values=colors_del11q, labels=c("unmutated", "mutated")) +
scale_shape_discrete(name="IGHV", labels=c("unmutated", "mutated")) +
facet_wrap(~drug)+
theme_figures_facet+
theme(axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
legend.position = "right",
legend.title = element_text(size=8, face="bold"),
legend.text=element_text(size=8),
legend.key.size = unit(0.5, 'cm'))+
labs(y = "Mean viability", x="", title="CLL") +
stat_compare_means(aes(group = del11q), method = "t.test", label = "p.signif", label.x.npc= "middle", label.y = 1.05, size=3)+
scale_y_continuous(breaks=seq(0.4,1.0, 0.2), limits=c(0.4,1.1)) +
guides(color = guide_legend(override.aes = list(label = "")))
SFig10a <- del11q_ighv + theme(plot.margin = margin(l = 1, r = 1, t=1, unit = "cm"))
SFig10a
#show effect of B-cell receptor inhibitors in U-CLL only
del11q_ucll <- del11q |>
filter(IGHV.status == "U") |>
ggplot(aes(x=del11q, y=mean_viab, color=del11q)) +
geom_boxplot(alpha = 1, width = 0.5, position=position_dodge(0.75), outlier.shape = NA) +
geom_jitter(aes(shape=IGHV.status), width = 0.1, alpha = 0.75) +
scale_color_manual(values=colors_del11q, labels=c("unmutated", "mutated")) +
facet_wrap(~drug)+
theme_figures_facet+
theme(axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
legend.position = "right",
legend.title = element_text(size=8, face="bold"),
legend.text=element_text(size=8),
legend.key.size = unit(0.5, 'cm'))+
labs(y = "Mean viability", x="", title="U-CLL") +
stat_compare_means(aes(group = del11q), method = "t.test", label = "p.signif", label.x.npc= "middle", label.y = 1.05, size=3)+
scale_y_continuous(breaks=seq(0.4,1.0, 0.2), limits=c(0.4,1.1)) +
guides(color = guide_legend(override.aes = list(label = "")), shape = "none")
SFig10b <- del11q_ucll + theme(plot.margin = margin(l = 1, r = 1, t=1, unit = "cm"))
SFig10b
#DDX3X mutations in CPS1000
ddx3x_table
## # A tibble: 8 × 8
## ID Sex IGHV `DNA mutation` `Amino acid alteration` Exon
## <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 P0293 M U c.845dupG p.C282fs 9
## 2 P0538 M U c.1115_1122del p.E372fs 10
## 3 P0696 F U c.A991G p.M331V 10
## 4 P0711 M U c.C1229G p.S410C 12
## 5 P0880 F U c.C1184T p.T395I 11
## 6 P0987 M U c.C1120T p.Q374X 10
## 7 P1027 M U c.T1256G p.L419R 11
## 8 P1075 M U c.C625T p.Q209X 6
## # ℹ 2 more variables: `Type of alteration` <chr>, `Allele frequency` <dbl>
ddx3x_table$`Allele frequency` <- round(ddx3x_table$`Allele frequency`,2)
#create table
ft <- flextable(ddx3x_table) |>
theme_alafoli()
ft <- ft |>
flextable::fontsize(size = 8, part = "header") |>
flextable::fontsize(size = 8, part = "body") |>
flextable::padding(padding.top = 3, padding.bottom = 3, part = "all") |>
flextable::align(align = "center", part = "all") |>
flextable::bold(part = "header") |>
flextable::width(j=c(1,2,3,6,8), width = 0.1) |>
flextable::width(j=c(4,7), width = 0.25) |>
flextable::width(j=c(5), width = 0.5) |>
flextable::line_spacing(space = 1.15, part = "all") |>
flextable::color(color = "black", part = "all") |>
#add header with Greek letter mu (μ)
flextable::vline(j = "ID")
SFig11a <- gen_grob(ft, fit = "width", scaling = "min", hjust = 0)
#prepare data
mutations <- ddx3x_cps1000[,c("position","mutation", "type_adjusted")]
mutations$frequency <- c(rep(1,times=8))
ddx3x_literature <- ddx3x_literature |>
group_by(position) |>
mutate(count = n())
mutations_lit <- ddx3x_literature[,c("position","mutation", "type_adjusted")]
mutations_lit$frequency <- ddx3x_literature$count*(-1)
mutations <- rbind(mutations, mutations_lit)
mutations <- mutations |>
mutate(type_adjusted = case_when(
type_adjusted == "multiple" ~ "Multiple",
TRUE ~ type_adjusted
))
#combine mutations into one label
mutations_combined <- mutations %>%
group_by(position, frequency) %>%
summarise(
mutations = paste(unique(mutation), collapse = ", "),
.groups = "drop"
)
#protein domain data (provide source)
domains <- data.frame(
start = c(182, 414),
end = c(404, 544),
domain = c("Helicase domain 1", "Helicase domain 2")
)
RNA_domains <- data.frame(
start = c(274, 302, 324, 445, 471, 495),
end = c(280, 303, 342, 450, 480, 504),
domain = c("RNA binding")
)
ATP_domains <- data.frame(
start = c(343, 525),
end = c(352, 536),
domain = c("ATP binding")
)
color_data <- rbind(domains,ATP_domains,RNA_domains)
#protein length
protein_length <- 662
#set colors
mycolors_type <- colorRampPalette(brewer.pal(6, "Set2"))(6)
names(mycolors_type) <- unique(mutations$type_adjusted)
mycolors_domain <- colorRampPalette(brewer.pal(4, "Set3"))(4)
mycolors_domain<- alpha(mycolors_domain, 1)
names(mycolors_domain) <- c("Helicase domain 1", "Helicase domain 2", "ATP binding", "RNA binding")
#plot
lollipop <- ggplot() +
#add mutation lollipops
geom_segment(data = mutations,
aes(x = position, xend = position,
y = 0, yend = frequency),
color = "grey50", alpha = 0.5, size = 0.25) +
geom_point(data = mutations,
aes(x = position, y = frequency, color = type_adjusted),
size = 2) +
#base rectangle representing the gene
statebins:::geom_rrect(aes(xmin = 0, xmax = protein_length,
ymin = -0.25, ymax = 0.25),
fill = "white", color = "black", size=0.4) +
#add protein domains as colored rectangles
geom_rect(data = domains,
aes(xmin = start, xmax = end,
ymin = -0.25, ymax = 0.25, fill = domain),
color = "white", size=0.25) +
#add RNA binding domains
geom_rect(data = RNA_domains,
aes(xmin = start, xmax = end,
ymin = -0.25, ymax = 0.25,
fill = domain),
color = "white", size=0.25) +
#add ATP binding domains
geom_rect(data = ATP_domains,
aes(xmin = start, xmax = end,
ymin = -0.25, ymax = 0.25,
fill = domain),
color = "white", size=0.25) +
#add base rectangle with transparent fill
statebins:::geom_rrect(aes(xmin = 0, xmax = protein_length,
ymin = -0.25, ymax = 0.25),
fill = "transparent", color = "black", size=0.4) +
#add mutation labels
geom_text_repel(data = mutations_combined,
aes(x = position, y = frequency,
label = mutations),
size = 3,
color = "black",
angle = 90,
force = 0.1,
force_pull = 0.1,
point.padding = 0.5,
box.padding = 0.3,
min.segment.length = 0,
segment.color = "grey80",
segment.alpha = 0.5,
segment.size = 0.25,
nudge_y = ifelse(mutations_combined$frequency < 1, -1, 0.5),
max.overlaps = Inf,
max.iter = 3000)+
#add domain labels
geom_text(data = color_data,
aes(x = (start + end)/2, y = 0,
label = "")) +
#labels
labs(
x = "Amino acid position", y = "Number of mutations", color = "Type of mutation", fill= "Domain", title = "DDX3X"
) +
#customize theme
theme_minimal() +
theme(panel.grid = element_blank(),
axis.text = element_text(size = 8, color = "black"),
axis.title.y = element_text(size = 8, hjust = 0.45), axis.title.x = element_text(size = 8),
axis.ticks.x = element_line(color = "black", size = 0.5), # Add x-axis ticks
axis.ticks.y = element_line(color = "black", size = 0.5), # Add y-axis ticks
axis.ticks.length = unit(5, "pt"), # Set the length of tick marks
legend.position = "right",
legend.title = element_text(size = 8, face="bold"),
legend.key.size = unit(0.5, 'cm'),
plot.title = element_text(hjust=0.5, size=8, face="bold")
) +
#scale adjustments
scale_y_continuous(
expand = c(0, 0), limits = c(-9, 3), breaks = (seq(-6, -1, 1)), labels = c(6,5,4,3,2,1))+
scale_x_continuous(limits = (c(-20,680)), breaks = c(1,100,200,300,400,500,600,662), expand = (c(0,0))) +
geom_segment(aes(x = 1, y = -9, xend = 662, yend = -9), color = "black", size = 0.75)+
geom_segment(aes(x = -20, y = -6.015, xend = -20, yend = -0.985), color = "black", size = 0.75)+
scale_color_manual(values=mycolors_type)+
scale_fill_manual(values=mycolors_domain)+
guides(fill = guide_legend(override.aes = list(colour = NA)))
Fig5e <- lollipop + theme(plot.margin = margin(t=1, l=1, r=1, unit = "cm"))
Fig5e
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 740 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
##
## out of 15127 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up) : 13, 0.086%
## LFC < 0 (down) : 4, 0.026%
## outliers [1] : 73, 0.48%
## low counts [2] : 0, 0%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
## [1] 17
## [1] "Intercept" "condition_cps_unmutated_vs_mutated"
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
## Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
## sequence count data: removing the noise and preserving large differences.
## Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## log2 fold change (MAP): condition cps unmutated vs mutated
## Wald test p-value: condition cps unmutated vs mutated
## DataFrame with 15127 rows and 5 columns
## baseMean log2FoldChange lfcSE pvalue padj
## <numeric> <numeric> <numeric> <numeric> <numeric>
## ENSG00000000003 1.9101 0.00289367 0.0846446 0.63193053 0.903934
## ENSG00000000419 2258.7670 0.36187463 0.3585549 0.00449449 0.393372
## ENSG00000000457 1022.1373 -0.02142053 0.0837562 0.39599811 0.805297
## ENSG00000000460 1218.4487 -0.00499457 0.0824318 0.79740023 0.957569
## ENSG00000000938 29787.4417 -0.01781967 0.0846137 0.39643908 0.805324
## ... ... ... ... ... ...
## ENSG00000273045 62.32308 -0.000905656 0.0817699 0.962356 0.994221
## ENSG00000273079 19.23316 0.004005899 0.0849180 0.110130 0.603490
## ENSG00000273155 2.72231 0.002674422 0.0842020 0.799143 0.957902
## ENSG00000273173 98.67154 -0.009399929 0.0833434 0.594818 0.892583
## ENSG00000273294 8.97583 0.008131371 0.0835206 0.641268 0.908161
## pathway pval padj log2err ES NES size
## <char> <num> <num> <num> <num> <num> <int>
## 1: HALLMARK_TNFA_SIGNALING_VIA_NFKB 5.41e-14 2.70e-12 0.965 0.523 2.25 188
## 2: HALLMARK_MYC_TARGETS_V1 2.48e-13 6.20e-12 0.933 0.501 2.18 199
## 3: HALLMARK_G2M_CHECKPOINT 6.21e-08 1.04e-06 0.705 0.438 1.91 196
## 4: HALLMARK_INFLAMMATORY_RESPONSE 1.05e-06 1.31e-05 0.644 0.430 1.84 171
## 5: HALLMARK_E2F_TARGETS 1.56e-06 1.56e-05 0.644 0.413 1.80 199
## 6: HALLMARK_KRAS_SIGNALING_UP 2.57e-06 2.15e-05 0.627 0.432 1.84 156
## leadingEdge
## <list>
## 1: BTG3, G0....
## 2: MCM2, SN....
## 3: TOP2A, M....
## 4: NLRP3, C....
## 5: ATAD2, C....
## 6: G0S2, MA....
## New names:
## New names:
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
## • `` -> `...7`
## • `` -> `...8`
## • `` -> `...9`
## • `` -> `...10`
## • `` -> `...11`
## • `` -> `...12`
## • `` -> `...13`
## • `` -> `...14`
## • `` -> `...15`
## • `` -> `...16`
## • `` -> `...17`
## • `` -> `...18`
## • `` -> `...19`
## • `` -> `...20`
## • `` -> `...21`
## • `` -> `...22`
## • `` -> `...23`
## • `` -> `...24`
## • `` -> `...25`
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning in cor(cd3e_expr_knis, x, use = "complete.obs"): the standard deviation
## is zero
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
## # Was:
## data %>% select(ucll_knis)
##
## # Now:
## data %>% select(all_of(ucll_knis))
##
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 2321 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
##
## out of 16735 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up) : 45, 0.27%
## LFC < 0 (down) : 13, 0.078%
## outliers [1] : 0, 0%
## low counts [2] : 0, 0%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
## [1] 58
## [1] "Intercept" "condition_knis_unmutated_vs_mutated"
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
## Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
## sequence count data: removing the noise and preserving large differences.
## Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## Warning in nbinomGLM(x = x, Y = YNZ, size = size, weights = weightsNZ, offset =
## offsetNZ, : the line search routine failed, unable to sufficiently decrease the
## function value
## Warning in nbinomGLM(x = x, Y = YNZ, size = size, weights = weightsNZ, offset =
## offsetNZ, : the line search routine failed, unable to sufficiently decrease the
## function value
## Warning in nbinomGLM(x = x, Y = YNZ, size = size, weights = weightsNZ, offset =
## offsetNZ, : the line search routine failed, unable to sufficiently decrease the
## function value
## Warning in nbinomGLM(x = x, Y = YNZ, size = size, weights = weightsNZ, offset =
## offsetNZ, : the line search routine failed, unable to sufficiently decrease the
## function value
## log2 fold change (MAP): condition knis unmutated vs mutated
## Wald test p-value: condition knis unmutated vs mutated
## DataFrame with 16735 rows and 5 columns
## baseMean log2FoldChange lfcSE pvalue padj
## <numeric> <numeric> <numeric> <numeric> <numeric>
## ENSG00000186092 1.67207 8.91947e-06 0.00144270 0.595549 0.998109
## ENSG00000187634 267.57342 1.97106e-06 0.00144264 0.849780 0.998109
## ENSG00000188976 3313.36386 7.83263e-06 0.00144255 0.380360 0.998109
## ENSG00000187961 311.10937 2.16735e-06 0.00144268 0.482642 0.998109
## ENSG00000187583 27.53665 7.18020e-07 0.00144268 0.803700 0.998109
## ... ... ... ... ... ...
## ENSG00000212907 90027.8 5.35529e-06 0.00144268 0.191985 0.969190
## ENSG00000198886 506468.1 5.87490e-06 0.00144266 0.284578 0.998109
## ENSG00000198786 285982.6 3.94295e-06 0.00144267 0.418751 0.998109
## ENSG00000198695 77486.4 2.17944e-06 0.00144266 0.545266 0.998109
## ENSG00000198727 341661.0 3.13512e-06 0.00144266 0.525181 0.998109
## pathway pval padj log2err ES
## <char> <num> <num> <num> <num>
## 1: HALLMARK_ALLOGRAFT_REJECTION 1.07e-10 5.35e-09 0.839 0.486
## 2: HALLMARK_INFLAMMATORY_RESPONSE 2.28e-10 5.70e-09 0.827 0.474
## 3: HALLMARK_TNFA_SIGNALING_VIA_NFKB 2.12e-07 3.53e-06 0.690 0.425
## 4: HALLMARK_EPITHELIAL_MESENCHYMAL_TRANSITION 6.27e-07 7.84e-06 0.659 0.414
## 5: HALLMARK_KRAS_SIGNALING_UP 1.74e-06 1.66e-05 0.644 0.420
## 6: HALLMARK_COAGULATION 1.99e-06 1.66e-05 0.627 0.458
## NES size leadingEdge
## <num> <int> <list>
## 1: 2.20 177 CD96, NL....
## 2: 2.16 191 CD14, NL....
## 3: 1.94 191 CCL20, S....
## 4: 1.88 190 THBS1, S....
## 5: 1.90 178 NAP1L2, ....
## 6: 1.98 126 THBS1, S....
#prepare data for plot
#mark top common gene sets
top10_common <- intersect(fgsea_hallmark$pathway[1:10],
fgsea_hallmark_knis$pathway[1:10])
#plot CPS data
hallmark_plot <- fgsea_hallmark |>
arrange(pval) |>
slice_head(n=10) |>
mutate(direction = as.character(ifelse(sign(NES) <0, "Up", "Down")),
is_top10 = pathway %in% top10_common,
pathway = as.character(pathway),
pathway_formatted = as.character(ifelse(is_top10, paste0("**", pathway, "**"), pathway)))
hallmark_plot$pathway_formatted <- factor(hallmark_plot$pathway_formatted,
levels = sort_by(unique(hallmark_plot$pathway_formatted),
rev(hallmark_plot$pval)))
hallmark <- hallmark_plot |>
ggplot(aes(x=-log10(pval), y=pathway_formatted, fill=direction)) +
geom_col(alpha=0.75) +
labs(
y = "",
x = expression(-log[10]*italic(P)),
fill = "Direction",
title = "Hallmark"
) +
theme_figures_border+
scale_y_discrete(expand = c(0.075,0.01))+
scale_x_continuous(expand = c(0.025,0))+
scale_fill_manual(values="lightcoral")
Fig5f <- hallmark +
theme(axis.text.y = element_markdown(color="black", size=8),
plot.margin = margin(t=2, l=1, b=1, r=1, unit = "cm"))
Fig5f
#prepare data for plot
#combine DEres
DEres_comb <- cbind(rbind(DEres_cps, DEres_knis),
source=c(rep("CPS1000", nrow(DEres_cps)), rep("Knisbacher",nrow(DEres_knis))))
#define significant genes
DEres_comb$sign <- ifelse((DEres_comb$padj < 0.1 &
abs(DEres_comb$log2FoldChange) > 2), TRUE, FALSE)
DEres_comb$source_sign <- ifelse(DEres_comb$sign == TRUE, DEres_comb$source, "non_sign")
#prepare annotation
colors_volcano <- setNames(c("lightgrey", "red", "lightblue"), unique(DEres_comb$source_sig))
#annotate significant genes in top hallmark genesets and map to genes
sets_hallmark <- msigdbr(species = "Homo sapiens", collection = "H")
gene_to_hallmark <- sets_hallmark |>
group_by(gene_symbol) |>
summarize(hallmark_pathways = paste(gs_name, collapse = "; ")) |>
ungroup()
DEres_comb <- DEres_comb |>
left_join(gene_to_hallmark, by = c("external_gene_name" = "gene_symbol"))
#plot
set.seed(123)
volcano <- DEres_comb |>
ggplot(aes(x = log2FoldChange, y = -log10(pvalue))) +
geom_point(aes(color=source_sign), size=1, alpha = 0.7) +
geom_label_repel(
aes(label = ifelse(
(str_detect(hallmark_pathways,
"HALLMARK_TNFA_SIGNALING_VIA_NFKB|HALLMARK_INFLAMMATORY_RESPONSE")) &
source_sign != "non_sign",
external_gene_name,
NA
), fill=source_sign, segment.color=source_sign),
color="black",
box.padding = 0.4,
max.overlaps = Inf,
size=3,
point.padding = 0.2,
label.size = 0.25,
min.segment.length = 0
)+
theme_figures+
labs(
title = "",
x = expression(log[2]~FC),
y = expression(-log[10]*italic(P)),
color = "Dataset"
) +
scale_color_manual(values = colors_volcano,
labels=c("CPS1000", "CLL-map Portal", "not significant"))+
scale_fill_manual(values = colors_volcano,
labels=c("CPS1000", "CLL-map Portal", "not significant"),
guide = "none")+
scale_discrete_manual("segment.color", values = colors_volcano, guide = "none")+
scale_x_continuous(limits=c(-10,10))+
annotate("text", x=5, y=8, label = "Down in DDX3X\nmutants", size = 3)+
annotate("text", x=-5, y=8, label = "Up in DDX3X\nmutants", size = 3) +
guides(color = guide_legend(override.aes = list(size = 2)))
Fig5g <- volcano + theme(plot.margin = margin(t=1.5, l=1, b=1.5, unit = "cm"))
Fig5g
## Warning: Removed 31720 rows containing missing values or values outside the scale range
## (`geom_label_repel()`).
#multipanel Fig.5
Fig5b <- Fig5b_top/Fig5b_bottom + plot_layout(ncol=1, guides = 'collect') & theme(legend.position='right',
plot.margin=margin(t=0.5, b=0.5, l=0.5, unit="cm"))
col1 <- plot_grid(Fig5a, Fig5c, Fig5d, Fig5e,
align = "v", axis = c("l", "r"), ncol = 1, rel_heights = c(0.2, 0.12, 0.25, 0.23),
labels=c("A", "C", "D", "E"), label_size=14)
col2 <- plot_grid(Fig5b, Fig5f, Fig5g, NULL,
align = "v", axis = c("r"),
ncol = 1, rel_heights = c(0.5, 0.2, 0.25, 0.05),
labels=c("B", "F", "G", ""), label_size=14)
## Warning: Removed 31720 rows containing missing values or values outside the scale range
## (`geom_label_repel()`).
Fig5 <- plot_grid(col1, col2, ncol=2, rel_widths = c(0.6, 0.4)) & theme(plot.margin=margin(l=1,t=1,b=1,r=1, unit="cm"))
ggsave(path=(paste0(filepath,"figures")), filename = "Fig5.svg", width=42, height=59.4, units = "cm")
## Warning: ggrepel: 50 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 55 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
ggsave(path=(paste0(filepath,"figures")), filename = "Fig5.pdf", width=42, height=59.4, units = "cm")
## Warning: ggrepel: 50 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 56 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
ggsave(path=(paste0(filepath,"figures")), filename = "Fig5.png", width=42, height=59.4, units = "cm", dpi=600) #remove later
## Warning: ggrepel: 50 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 55 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
#prepare data, use all samples, limit to CLL
treatments_cll <- treatments |>
filter(patientID %in% screen_long[screen_long$diagnosis == "CLL",]$patientID)
#subtract suffixes from sampleID
treatments_cll$sample1 <- gsub("-[1234]$", "", treatments_cll$sample1)
treatments_cll$sample2 <- gsub("-[1234]$", "", treatments_cll$sample2)
# merge tables, limit to CLL samples
merged <- left_join(samples, screen_allSamples[, !colnames(screen_allSamples) %in% "patientID"], by = "sampleID") |>
filter(patientID %in% screen_long[screen_long$diagnosis == "CLL",]$patientID)
#add date of sampling
#subtract suffixes from sampleID
merged$sampleID <- gsub("-[1234]$", "", merged$sampleID)
#verify sampleID is unique
length(unique(merged$sampleID)) == nrow(merged)
## [1] TRUE
merged_date <- left_join(merged, survival[, colnames(survival) %in% c("sampleID", "sampleDate")], by = "sampleID") |>
relocate(sampleDate, .after = sampleID)
#check NA
sum(is.na(merged_date$sampleDate))
## [1] 0
#transfrom in long data frame
merged_table_long <- merged_date |>
pivot_longer(cols = c(4:length(merged_date)), names_to = "drug", values_to = "viability")
#add concentration level and remove last two characters from each string in 'drug' column
merged_table_long <- merged_table_long |>
mutate(conc_index = as.numeric(str_extract(drug, "\\d+$")),
drug = str_replace(drug, "_\\d+$", ""))
#create means
merged_table_means <- merged_table_long |>
group_by(patientID, sampleID, sampleDate, drug) |>
summarize(mean_viab=mean(viability))
## `summarise()` has grouped output by 'patientID', 'sampleID', 'sampleDate'. You
## can override using the `.groups` argument.
#convert to wide format
longitudinal_wide <- merged_table_means |>
pivot_wider(names_from = drug, values_from = mean_viab)
#calculate pair-wise viability differences
differences_long <- create_mean_differences(treatments, longitudinal_wide)
#summary of sample pairs with treatment annotation and ex vivo viability data
summary_table <- differences_long |>
group_by(therapy) |>
summarize(sample_pairs = n()/63,
patients = n_distinct(patientID),
unique_samples = n_distinct(c(sample1, sample2)),
unique_sample1 = n_distinct(sample1),
unique_sample2 = n_distinct(sample2)) |>
bind_rows(differences_long |>
summarise(therapy = "overall",
sample_pairs = n()/63,
patients = n_distinct(patientID),
unique_samples = n_distinct(c(sample1, sample2)),
unique_sample1 = n_distinct(sample1),
unique_sample2 = n_distinct(sample2)))
summary_table
## # A tibble: 4 × 6
## therapy sample_pairs patients unique_samples unique_sample1 unique_sample2
## <chr> <dbl> <int> <int> <int> <int>
## 1 chemo 47 40 87 41 46
## 2 none 65 65 130 65 65
## 3 targeted 20 19 39 19 20
## 4 overall 132 110 242 111 131
#date intervals of samples
df_date <- differences_long |>
filter(drug == "Ibrutinib") |>
dplyr::select("patientID", "pair", "sample1", "sample2") |>
merge(merged_date[, colnames(merged_date) %in% c("sampleID", "sampleDate")], by.x = "sample1", by.y ="sampleID") |>
rename(sampleDate1 = sampleDate)
df_date <- df_date |>
rename(sampleID = sample2) |>
left_join(merged_date[, colnames(merged_date) %in% c("sampleID", "sampleDate")], by ="sampleID") |>
rename(sampleDate2 = sampleDate, sample2 = sampleID) |>
mutate(diff_date = sampleDate2 - sampleDate1)
df_date$diff_date <- as.numeric(df_date$diff_date)
summary(df_date$diff_date)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 7 224 515 580 892 1757
#sample pairs with more than 0.1 change in viability
differences_long |>
filter(abs(difference) > 0.1) |>
nrow()/nrow(differences_long)*100
## [1] 4.85
#plot
long_distr <- differences_long |>
ggplot(aes(difference))+
geom_histogram(color="white", fill="grey40", alpha=0.5,bins = 50)+
geom_vline(xintercept = 0, linetype = "dashed", color = "black")+
labs(x="Difference in mean viability", y="Count")+
theme_figures+
scale_y_continuous(expand = c(0.02,0))+
scale_x_continuous(expand = c(0.02,0), limits=c(-0.3, 0.3))
Fig6a <- long_distr + theme(plot.margin = margin(t=1, b=1, l=1, unit = "cm"))
Fig6a
## Warning: Removed 129 rows containing non-finite outside the scale range
## (`stat_bin()`).
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_bar()`).
## Warning: There were 2 warnings in `mutate()`.
## The first warning was:
## ℹ In argument: `timepoint = as.numeric(timepoint)`.
## Caused by warning:
## ! NAs introduced by coercion
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 1 remaining warning.
## Warning in geom_segment(aes(x = 0.5, y = 6, xend = 3, yend = 6), arrow = arrow(length = unit(0.15, : All aesthetics have length 1, but the data has 12 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.
## Warning in geom_segment(aes(x = 0, y = 6, xend = 0.5, yend = 6), arrow = arrow(length = unit(0.15, : All aesthetics have length 1, but the data has 12 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.
## Warning in geom_segment(aes(x = 0, y = 5, xend = 3, yend = 5), arrow = arrow(length = unit(0.15, : All aesthetics have length 1, but the data has 12 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.
## Warning in geom_segment(aes(x = 1.8, y = 4, xend = 3, yend = 4), arrow = arrow(length = unit(0.15, : All aesthetics have length 1, but the data has 12 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.
## Warning in geom_segment(aes(x = 1.2, y = 4, xend = 1.8, yend = 4), arrow = arrow(length = unit(0.15, : All aesthetics have length 1, but the data has 12 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.
## Warning in geom_segment(aes(x = 0, y = 4, xend = 1.2, yend = 4), arrow = arrow(length = unit(0.15, : All aesthetics have length 1, but the data has 12 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.
## Warning in geom_segment(aes(x = 1.5, y = 3, xend = 3, yend = 3), arrow = arrow(length = unit(0.15, : All aesthetics have length 1, but the data has 12 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.
## Warning in geom_segment(aes(x = 0, y = 3, xend = 1.5, yend = 3), arrow = arrow(length = unit(0.15, : All aesthetics have length 1, but the data has 12 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.
## Warning in geom_segment(aes(x = 0.8, y = 2, xend = 3, yend = 2), arrow = arrow(length = unit(0.15, : All aesthetics have length 1, but the data has 12 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.
## Warning in geom_segment(aes(x = 0, y = 2, xend = 0.8, yend = 2), arrow = arrow(length = unit(0.15, : All aesthetics have length 1, but the data has 12 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.
## Warning in geom_segment(aes(x = 1.5, y = 1, xend = 3, yend = 1), arrow = arrow(length = unit(0.15, : All aesthetics have length 1, but the data has 12 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.
## Warning in geom_segment(aes(x = 0, y = 1, xend = 1.5, yend = 1), arrow = arrow(length = unit(0.15, : All aesthetics have length 1, but the data has 12 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.
## Warning in geom_segment(aes(x = 1, y = 0.5, xend = 1, yend = 6.5), color = "grey", : All aesthetics have length 1, but the data has 12 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.
## Warning in geom_segment(aes(x = 2, y = 0.5, xend = 2, yend = 6.5), color = "grey", : All aesthetics have length 1, but the data has 12 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.
#use individual drug responses
merged_table_wide <- merged_table_long |>
pivot_wider(names_from = drug, values_from = viability)
#calculate pair-wise viability differences
differences_long_allconc <- create_conc_differences(treatments, merged_table_wide)
## Warning in left_join(., viab_df, by = c(sample1 = "sampleID")): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 1 of `x` matches multiple rows in `y`.
## ℹ Row 71 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
## "many-to-many"` to silence this warning.
## Warning in left_join(., viab_df, by = c(sample2 = "sampleID")): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 1 of `x` matches multiple rows in `y`.
## ℹ Row 11 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
## "many-to-many"` to silence this warning.
#add concentrations
differences_long_allconc <- differences_long_allconc |>
left_join(screen_long |>
distinct(drug, conc_index, concentration), by = c("drug", "conc_index")) |>
drop_na(conc_index)
#t-test with FDR within status and therapy
df_ttest_allconc <- differences_long_allconc |>
group_by(drug, conc_index, status, therapy) |>
mutate(pval = t.test(sample1_value, sample2_value, paired = TRUE)$p.value) |>
ungroup() |>
group_by(status, therapy) |>
mutate(padj = p.adjust(pval, method="BH"))
#order drugs by their association similarity (hierarchical clustering)
pMat <- df_ttest_allconc |>
filter(padj < 0.1) |>
group_by(drug, conc_index, therapy, status) |>
mutate(sign_log_pval = -log10(mean(pval)) * sign(mean(percent_change))) |>
ungroup() |>
dplyr::select(drug, conc_index, therapy, status, sign_log_pval) |>
# Create a unique identifier for each condition
unite("condition", therapy, status, conc_index, sep = "_") |>
pivot_wider(names_from = condition,
values_from = sign_log_pval,
values_fn = median) |>
as.data.frame() |>
column_to_rownames("drug") |>
as.matrix()
#replace NAs with 0 (no significant effect)
pMat[is.na(pMat)] <- 0
#perform hierarchical clustering
set.seed(123)
drug_clusters <- hclust(dist(pMat), method = "ward.D2")
drug_order <- rownames(pMat)[drug_clusters$order]
df_ttest_allconc$drug <- factor(df_ttest_allconc$drug, levels = drug_order)
#rename variables
df_ttest_allconc$therapy <- recode(df_ttest_allconc$therapy,
"chemo" = "Chemo-Immuno",
"targeted" = "Targeted",
"none" = "None")
#plot heatmap
pval_hmap <- df_ttest_allconc |>
filter(padj <0.1) |>
group_by(drug, conc_index, therapy, status) |>
mutate(sign_log_pval = -log10(mean(pval)) * sign(mean(percent_change))) |>
ggplot(aes(x=conc_index, y=drug, fill=sign_log_pval))+
geom_tile(size = 0.2, color = "white")+
facet_wrap(therapy~status, ncol=3)+
scale_fill_gradient2(low="blue", high="red", limits=c(-4, 4))+
labs(y="", x="Concentration index", fill = "**-log<sub>10</sub>*P*<br>with direction**") +
theme_figures_facet_angle45_x_legend+
theme(axis.text.x = element_text(angle = 0, hjust = 0.5, vjust = 0),
legend.title = ggtext::element_markdown())
Fig6c <- pval_hmap + theme(plot.margin = margin(l=1, t=1, b=0.5, unit = "cm"))
Fig6c
#create list of sign. drugs in targeted, start between
targ_list <- df_ttest_allconc |>
filter(padj <0.1, therapy == "Targeted", status == "start between") |>
group_by(drug) |>
mutate(median_diff = median(difference)) |>
arrange(median_diff)
targ_list$drug <- factor(targ_list$drug, levels = unique(targ_list$drug))
#use differences in mean viability
median_ranking_targeted <- differences_long |>
filter(drug %in% unique(targ_list$drug), therapy == "targeted", status == "start between") |>
group_by(drug) |>
mutate(median_diff = median(difference)) |>
arrange(median_diff)
median_ranking_order <- unique(median_ranking_targeted$drug)
#prepare pathway labeling
pathway_annotation <- differences_long |>
filter(drug %in% unique(targ_list$drug),
therapy == "targeted",
status == "start between") |>
mutate(drug = factor(drug, levels = median_ranking_order)) |>
merge(drugs[,c("Drug", "Pathway_mod2")], by.x="drug", by.y="Drug")
median_ranking_order <- unique(median_ranking_targeted$drug)
pathway_annotation$Pathway_mod2 <- factor(pathway_annotation$Pathway_mod2, levels = order_pway2)
#plot
long_mean_viab <- pathway_annotation |>
ggplot(aes(y = difference, x = drug, fill=Pathway_mod2)) +
geom_boxplot(alpha = 0.8, outliers = TRUE) +
geom_hline(yintercept = 0, color = "black", linetype = "dashed", linewidth =0.5) +
scale_fill_manual(values=colors_pathways_mod2)+
theme_figures_angle45_x+
labs(fill = "Pathway", y = "Difference in mean viability", x = "")
Fig6d <- long_mean_viab + theme(plot.margin = margin(t=1, l=1, unit = "cm"))
Fig6d
#plot OTX015 and TW-37
otx_tw <- differences_long_allconc |>
mutate(drug = case_when(drug == "OTX015" ~ "OTX015 (BET)",
drug == "TW-37" ~ "TW-37 (BCL2/MCL1)",
TRUE ~ drug)) |>
pivot_longer(cols = c(sample1_value, sample2_value), names_to = "timepoint", values_to = "viability") |>
filter(drug %in% c("OTX015 (BET)","TW-37 (BCL2/MCL1)"), status == "start between", therapy == "targeted") |>
ggplot(aes(x=factor(concentration), y=viability, color=timepoint))+
geom_boxplot(alpha = 0.3, width = 0.5, position=position_dodge(0.75), outlier.shape = NA)+
geom_point(aes(color = timepoint), alpha = 0.25,
position = position_jitterdodge(dodge.width = 0.8, jitter.width = 0.2))+
labs(y = "Viability", color= "Sampling")+
facet_wrap(~drug, ncol=2) +
stat_compare_means(aes(group = timepoint),
method = "t.test",
paired = TRUE,
label = "p.signif",
label.y = 1.2,
size=3)+
labs(x = bquote("Concentration ("*mu*"M)"))+
scale_y_continuous(breaks=seq(0,1.0, 0.2), limits=c(0,1.38)) +
scale_color_manual(labels = c("sample1_value" = "Before treatment",
"sample2_value" = "On treatment"),
values = colors_treatment)+
scale_x_discrete(labels = c("0.016", "0.08", "0.4", "2", "10"))+
guides(color = guide_legend(override.aes = list(label = c(""))))+
theme_figures_facet_angle45_x_legend
Fig6e <- otx_tw + theme(plot.margin = margin(l=1, t=1, unit = "cm"))
Fig6e
#plot BCR inhibitors
bcr <- differences_long_allconc |>
pivot_longer(cols = c(sample1_value, sample2_value), names_to = "timepoint", values_to = "viability") |>
filter(drug %in% c("Dasatinib","Ganetespib", "Idelalisib", "AZD8055", "Cobimetinib", "Ibrutinib"),
status == "start between", therapy == "targeted") |>
mutate(drug = case_when(drug == "AZD8055" ~ "AZD8055 (mTOR)",
drug == "Cobimetinib" ~ "Cobimetinib (MEK)",
drug == "Dasatinib" ~ "Dasatinib (ABL/LYN)",
drug == "Idelalisib" ~ "Idelalisib (PI3K)",
drug == "Ganetespib" ~ "Ganetespib (HSP90)",
drug == "Ibrutinib" ~ "Ibrutinib (BTK)",
TRUE ~ drug)) |>
ggplot(aes(x=factor(concentration), y=viability, color=timepoint))+
geom_boxplot(alpha = 1, width = 0.5, position=position_dodge(0.75), outlier.shape = NA) +
geom_jitter(alpha = 0.3, position = position_jitterdodge(dodge.width = 0.75, jitter.width = 0.1)) +
theme_figures_facet_angle45_x_legend+
scale_y_continuous(breaks=seq(0,1.0, 0.2), limits=c(0,1.2)) +
guides(color = guide_legend(override.aes = list(label = ""))) +
facet_wrap(~drug)+
stat_compare_means(aes(group = timepoint),
method = "t.test",
paired = TRUE,
label = "p.signif",
label.y = 1.1,
size=3)+
labs(y = "Viability", color= "Sampling", x = bquote("Concentration ("*mu*"M)"))+
scale_color_manual(labels = c("sample1_value" = "Before treatment",
"sample2_value" = "On treatment"),
values = colors_treatment)+
scale_x_discrete(labels = c("0.016", "0.08", "0.4", "2", "10"))+
guides(color = guide_legend(override.aes = list(label = c(""))))
SFig12c <- bcr + theme(plot.margin = margin(t=1, l=1, unit = "cm"))
#plot MCL1 and BCL2 inhibitors
mcl_bcl <- differences_long_allconc |>
pivot_longer(cols = c(sample1_value, sample2_value), names_to = "timepoint", values_to = "viability") |>
filter(drug %in% c("Venetoclax","A.1210477"), status == "start between", therapy == "targeted") |>
mutate(drug = case_when(drug == "Venetoclax" ~ "Venetoclax (BCL2)",
drug == "A.1210477" ~ "A.1210477 (MCL1)",
TRUE ~ drug)) |>
ggplot(aes(x=factor(concentration), y=viability, color=timepoint))+
geom_boxplot(alpha = 1, width = 0.5, position=position_dodge(0.75), outlier.shape = NA) +
geom_jitter(alpha = 0.3, position = position_jitterdodge(dodge.width = 0.75, jitter.width = 0.1)) +
theme_figures_facet_angle45_x_legend+
scale_y_continuous(breaks=seq(0,1.0, 0.2), limits=c(0,1.2)) +
guides(color = guide_legend(override.aes = list(label = ""))) +
facet_wrap(~drug, nrow=2)+
stat_compare_means(aes(group = timepoint),
method = "t.test",
paired = TRUE,
label = "p.signif",
label.y = 1.1,
size=3)+
labs(y = "Viability", color= "Sampling", x = bquote("Concentration ("*mu*"M)"))+
scale_color_manual(labels = c("sample1_value" = "Before treatment",
"sample2_value" = "On treatment"),
values = colors_treatment)+
scale_x_discrete(labels = c("0.016", "0.08", "0.4", "2", "10"))+
guides(color = guide_legend(override.aes = list(label = c(""))))
SFig12d <- mcl_bcl + theme(plot.margin = margin(t=1, l=1, r=1, unit = "cm"))
#multipanel
col1 <- plot_grid(Fig6a, Fig6b,
align = "v", axis = c("r"),
ncol = 1, rel_heights = c(0.4, 0.4),
labels=c("A", "B"), label_size=14)
## Warning: Removed 129 rows containing non-finite outside the scale range
## (`stat_bin()`).
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_bar()`).
## Warning in geom_segment(aes(x = 0.5, y = 6, xend = 3, yend = 6), arrow = arrow(length = unit(0.15, : All aesthetics have length 1, but the data has 12 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.
## Warning in geom_segment(aes(x = 0, y = 6, xend = 0.5, yend = 6), arrow = arrow(length = unit(0.15, : All aesthetics have length 1, but the data has 12 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.
## Warning in geom_segment(aes(x = 0, y = 5, xend = 3, yend = 5), arrow = arrow(length = unit(0.15, : All aesthetics have length 1, but the data has 12 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.
## Warning in geom_segment(aes(x = 1.8, y = 4, xend = 3, yend = 4), arrow = arrow(length = unit(0.15, : All aesthetics have length 1, but the data has 12 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.
## Warning in geom_segment(aes(x = 1.2, y = 4, xend = 1.8, yend = 4), arrow = arrow(length = unit(0.15, : All aesthetics have length 1, but the data has 12 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.
## Warning in geom_segment(aes(x = 0, y = 4, xend = 1.2, yend = 4), arrow = arrow(length = unit(0.15, : All aesthetics have length 1, but the data has 12 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.
## Warning in geom_segment(aes(x = 1.5, y = 3, xend = 3, yend = 3), arrow = arrow(length = unit(0.15, : All aesthetics have length 1, but the data has 12 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.
## Warning in geom_segment(aes(x = 0, y = 3, xend = 1.5, yend = 3), arrow = arrow(length = unit(0.15, : All aesthetics have length 1, but the data has 12 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.
## Warning in geom_segment(aes(x = 0.8, y = 2, xend = 3, yend = 2), arrow = arrow(length = unit(0.15, : All aesthetics have length 1, but the data has 12 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.
## Warning in geom_segment(aes(x = 0, y = 2, xend = 0.8, yend = 2), arrow = arrow(length = unit(0.15, : All aesthetics have length 1, but the data has 12 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.
## Warning in geom_segment(aes(x = 1.5, y = 1, xend = 3, yend = 1), arrow = arrow(length = unit(0.15, : All aesthetics have length 1, but the data has 12 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.
## Warning in geom_segment(aes(x = 0, y = 1, xend = 1.5, yend = 1), arrow = arrow(length = unit(0.15, : All aesthetics have length 1, but the data has 12 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.
## Warning in geom_segment(aes(x = 1, y = 0.5, xend = 1, yend = 6.5), color = "grey", : All aesthetics have length 1, but the data has 12 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.
## Warning in geom_segment(aes(x = 2, y = 0.5, xend = 2, yend = 6.5), color = "grey", : All aesthetics have length 1, but the data has 12 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.
col2 <- plot_grid(Fig6c, ncol = 1,
#rel_heights = c(0.8, 0.2),
labels=c("C", ""), label_size=14)
col3 <- plot_grid(Fig6d,Fig6e, ncol = 1, align = "v", axis = c("l", "r"),
rel_heights = c(0.55, 0.45),
labels=c("D", "E"), label_size=14)
top <- plot_grid(col1, col2, col3, ncol=3,
align = "h", axis = c("t", "b"),
rel_widths = c(0.2, 0.35, 0.5))
Fig6 <- plot_grid(top, NULL, nrow =2, rel_heights = c(0.25, 0.75)) & theme(plot.margin=margin(r=1,l=1,t=1,b=1,unit="cm"))
ggsave(path=(paste0(filepath,"figures")), filename = "Fig6.svg", width=42, height=59.4, units = "cm")
ggsave(path=(paste0(filepath,"figures")), filename = "Fig6.pdf", width=42, height=59.4, units = "cm")
ggsave(path=(paste0(filepath,"figures")), filename = "Fig6.png", width=42, height=59.4, units = "cm", dpi=600)
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `crr = as.numeric(crr)`.
## Caused by warning:
## ! NAs introduced by coercion
## `summarise()` has grouped output by 'patientID', 'drug'. You can override using
## the `.groups` argument.
## `summarise()` has grouped output by 'diagnosis', 'patientID'. You can override
## using the `.groups` argument.
## `summarise()` has grouped output by 'diagnosis'. You can override using the
## `.groups` argument.
#multipanel
Fig7a <- p1+p2 + plot_annotation("CLL", theme = theme(
plot.margin = margin(t=1, b=1, l=1, unit = "cm"),
plot.title = element_text(hjust=0.05, size=8, face="bold"))) +
plot_layout(guides = "collect")
Fig7a
## Warning: Removed 8 rows containing missing values or values outside the scale range
## (`geom_ribbon()`).
## Warning: Removed 8 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 34 rows containing missing values or values outside the scale range
## (`geom_ribbon()`).
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 34 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: ggrepel: 6 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
Fig7b <- p3+p4 + plot_annotation("AML", theme = theme(
plot.margin = margin(t=1, b=1, l=1, unit = "cm"),
plot.title = element_text(hjust=0.05, size=8, face="bold"))) +
plot_layout(guides = "collect")
Fig7b
## Warning: Removed 8 rows containing missing values or values outside the scale range
## (`geom_ribbon()`).
## Warning: Removed 8 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 1 row containing non-finite outside the scale range
## (`sta_corr()`).
## Warning: Removed 34 rows containing missing values or values outside the scale range
## (`geom_ribbon()`).
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 34 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_text_repel()`).
## Warning: ggrepel: 3 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 6 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning in geom_segment(aes(x = ic50_ven$ic50[ic50_ven$diagnosis == "CLL"], : All aesthetics have length 1, but the data has 5 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.
## Warning in geom_segment(aes(x = ic50_ven$ic50[ic50_ven$diagnosis == "CLL"] - : All aesthetics have length 1, but the data has 5 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.
#create boxplots for correlations coefficients of drug metrics in AML and CLL
#function to calculate correlations
compute_cor <- function(data, parameters, outcome, diagnosis) {
cor_data <- sapply(parameters, function(par) {
cor(data[[outcome]], data[[par]], use = "pairwise.complete.obs", method = "pearson")
})
data.frame(
parameter = parameters,
cor = cor_data,
diagnosis = diagnosis,
outcome = outcome
)
}
parm<- c("HS_mean", "Einf_mean", "pIC50_mean", "R2_mean", "AUC_mean", "viab_mean")
CLL_ORR <- compute_cor(CLL_outcomes, parm, "orr", "CLL")
CLL_CRR <- compute_cor(CLL_outcomes, parm, "crr", "CLL")
AML_ORR <- compute_cor(AML_outcomes, parm, "orr", "AML")
AML_CRR <- compute_cor(AML_outcomes, parm, "crr", "AML")
cor_combined <- bind_rows(CLL_ORR, CLL_CRR, AML_ORR, AML_CRR)
#plot
par_list <- cor_combined |>
filter(parameter != "pF_mean", diagnosis == "CLL", outcome == "crr") |>
arrange(abs(cor)) |>
pull(parameter)
cor_combined$parameter <- factor(cor_combined$parameter, levels = par_list)
Fig7d <- cor_combined |>
filter(str_detect(parameter, "mean")) |>
mutate(direction = ifelse(sign(cor) >0, "positive", "negative")) |>
filter(parameter != "pF_mean") |>
ggplot(aes(y=parameter, x=abs(cor), fill=diagnosis))+
geom_col(aes(alpha=outcome), position = position_dodge(width=0.8), width=0.8)+
facet_wrap(~diagnosis, ncol=2)+
labs(title="", y="Parameter (mean)", x="Absolute Pearson correlation coefficient",
fill = "Diagnosis", alpha="Outcome")+
theme_figures_facet_angle60_x+
scale_fill_manual(values=colors_diag, guide="none")+
scale_alpha_manual(values=c(1, 0.5), labels = c("CRR", "ORR"))+
xlim(0,1)+
scale_y_discrete(labels = c("Hill coefficient",
expression(pIC[50]), # subscript
expression(E[inf]), # subscript
expression(R^2), # superscript
"AUC",
"Mean viability"))
Fig7d <- Fig7d + theme(plot.margin = margin(t=1, b=1, l=1, unit = "cm"))
Fig7d
#prepare data: use mean viability in CLL
screen_means_cll <- screen_means |>
filter(diagnosis == "CLL")
#merge with survival data, add age
surv_combined <- left_join(
screen_means_cll,
screen_long |> distinct(patientID, sampleID), # Remove duplicates
by = "patientID"
) |>
left_join(survival |> rename(patientID = patID),
by = c("sampleID", "patientID")) |>
left_join(age |> dplyr::select(-sampleDate),
by = c("sampleID", "patientID"))
#merge with genomic aberrations
surv_combined <- left_join(surv_combined, metadata |>
dplyr::select(-diagnosis, -treatment), by = c("patientID" = "Patient.ID")) |>
mutate(IGHV.status = case_when(IGHV.status == "M" ~ "1",
IGHV.status == "U" ~ "0",
TRUE ~ IGHV.status))
#summary
length(unique(surv_combined$patientID))
## [1] 275
surv_combined |>
group_by(treatedAfter) |>
summarize(n=n()/63)
## # A tibble: 3 × 2
## treatedAfter n
## <lgl> <dbl>
## 1 FALSE 145
## 2 TRUE 99
## 3 NA 31
#perform cox regression for each drug and clinical feature
#univariate analysis for OS: drugs, stratified by IGHV
resOS_uni <- filter(surv_combined, !is.na(OS), !is.na(IGHV.status)) %>%
group_by(drug, IGHV.status) %>%
do(com_uni(.$mean_viab, .$OS, .$died, TRUE)) %>% ungroup() %>%
arrange(p) %>% mutate(p.adj = p.adjust(p, method = "BH")) %>%
mutate(Endpoint = "OS")
##univariate analysis for TTT: drugs, stratified by IGHV
resTTT_uni <- filter(surv_combined, !is.na(TTT), !is.na(IGHV.status)) %>%
group_by(drug, IGHV.status) %>%
do(com_uni(.$mean_viab, .$TTT, .$treatedAfter, TRUE)) %>% ungroup() %>%
arrange(p) %>% mutate(p.adj = p.adjust(p, method = "BH")) %>%
mutate(Endpoint = "TTT")
#plot Hazard ratios and p values
plotTab_uni <- bind_rows(resOS_uni, resTTT_uni) %>%
filter(drug %in% unique(filter(.,p.adj < 0.05)$drug))
cox_uni_strat <- plotTab_uni |>
mutate(IGHV.status = ifelse(IGHV.status == "1", "M-CLL", "U-CLL")) |>
ggplot(aes(x=drug, y = HR, col = Endpoint, dodge = Endpoint)) +
geom_hline(yintercept = 1, linetype = "dotted") +
geom_point(position = position_dodge(width=0.75)) +
geom_errorbar(position = position_dodge(width =0.75),
aes(ymin = lower, ymax = higher), width = 0.3) +
geom_text(position = position_dodge(width =0.75), size = 3,
aes(y = higher + 0.6,
label = ifelse(p < 0.001,
"italic(P)<0.001",
paste0("italic(P)==", sprintf("%1.3f", p)))),
parse = TRUE) +
facet_wrap(~IGHV.status, scales = "free") +
labs(y="Hazard ratio", x="")+
coord_flip() +
theme_figures_facet+
scale_color_manual(values=colors_cox)+
guides(color = guide_legend(override.aes = aes(label = "")))+
ylim(0,4)
SFig13j <- cox_uni_strat + theme(plot.margin = margin(t=1, l=1, r=1, unit = "cm"))
SFig13j
#multivariate analysis for OS, combine del17p and TP53
resOS <- surv_combined |>
mutate(TP53_del17p = ifelse(TP53==1|del17p==1, 1,0)) |>
filter(!is.na(OS), !is.na(IGHV.status), !is.na(TP53_del17p), !is.na(age)) %>%
group_by(drug) %>%
do(com(.$mean_viab, .$IGHV.status, .$TP53_del17p, .$age, .$OS, .$died, TRUE)) %>% ungroup() %>%
arrange(p) %>% mutate(p.adj = p.adjust(p, method = "BH")) %>%
mutate(Endpoint = "OS")
#multivariate analysis for TTT, combine del17p and TP53
resTTT <- surv_combined |>
mutate(TP53_del17p = ifelse(TP53==1|del17p==1, 1,0)) |>
filter(!is.na(TTT), !is.na(IGHV.status), !is.na(TP53_del17p), !is.na(age)) %>%
group_by(drug) %>%
do(com(.$mean_viab, .$IGHV.status, .$TP53_del17p,.$age,.$TTT, .$treatedAfter, TRUE)) %>% ungroup() %>%
arrange(p) %>% mutate(p.adj = p.adjust(p, method = "BH")) %>%
mutate(Endpoint = "TTT")
#plot Hazard ratios and p values for ibrutinib
plotTab <- bind_rows(resOS, resTTT) %>%
filter(drug %in% unique(filter(.,p.adj < 0.05)$drug))
multi_cox_ibr <- plotTab |>
filter(drug == "Ibrutinib") |>
ggplot(aes(x=variable, y = HR, col = Endpoint)) +
geom_hline(yintercept = 1, linetype = "dotted") +
geom_point(position = position_dodge(width=0.75)) +
geom_errorbar(position = position_dodge(width =0.75),
aes(ymin = lower, ymax = higher), width = 0.3) +
geom_text(position = position_dodge(width =0.75), size = 3,
aes(y = higher + 0.6,
label = paste0("italic(P)==", sprintf("%1.3f", p))),
parse = TRUE) +
labs(y="Hazard ratio", x="")+
coord_flip() +
theme_figures+
scale_x_discrete(labels = c("TP53_del17p" = "TP53/del(17p)",
"response" = "Ex vivo\nresistance to\nibrutinib",
"ighv" = "IGHV",
"age" = "Age at\nsampling"))+
scale_color_manual(values=colors_cox)+
guides(color = guide_legend(override.aes = aes(label = "")))+
ylim(0,6)
Fig7e <- multi_cox_ibr + theme(plot.margin = margin(t=2, b=1, l=1, r=1, unit = "cm"))
Fig7e
#survival curves
#maxstat
km_surv <- surv_combined |>
filter(!is.na(IGHV.status), drug == "Ibrutinib")
#OS
res.cut.os <- surv_cutpoint(km_surv, time = "OS", event = "died",
variables = c("mean_viab"))
summary(res.cut.os)
## cutpoint statistic
## mean_viab 0.825 4.55
os_value <- summary(res.cut.os)$cutpoint
res.cat.os <- surv_categorize(res.cut.os)
fit_os <- survfit(Surv(OS, died) ~ mean_viab, data = res.cat.os)
#TTT
res.cut.ttt <- surv_cutpoint(km_surv, time = "TTT", event = "treatment",
variables = c("mean_viab"))
summary(res.cut.ttt)
## cutpoint statistic
## mean_viab 0.895 6.15
ttt_value <- summary(res.cut.ttt)$cutpoint
res.cat.ttt <- surv_categorize(res.cut.ttt)
fit_ttt <- survfit(Surv(TTT, treatment) ~ mean_viab, data = res.cat.ttt)
#plot OS Kaplan-meier curve based on cut-off
km_surv <- km_surv |>
mutate(ibr_resistance_os = ifelse(mean_viab < os_value, 0, 1),
ibr_resistance_ttt = ifelse(mean_viab < ttt_value, 0, 1))
os <- survfit2(Surv(OS, died) ~ ibr_resistance_os+IGHV.status, data = km_surv)
pval <- sub("^p", "", survfit2_p(os))
km_os <- os |>
ggsurvfit() +
labs(x = "Years", y = "Overall survival", title = "Overall survival") +
add_censor_mark(size = 1, alpha = 0.5) +
scale_ggsurvfit() +
labs(x="Years from sampling")+
theme_figures+
theme(legend.position = "bottom")+
scale_color_manual(labels=c("U-CLL, low ibrutinib viability", "U-CLL, high ibrutinib viability",
"M-CLL, low ibrutinib viability", "M-CLL, high ibrutinib viability"),
values = c("#CA054D", "#B96D40", "#A4D4B4", "#3B1C32"))+
annotate(
"text",
x = 2,
y = 0.05,
label = paste0("'Log-rank'~italic(P)", pval),
parse = TRUE,
hjust = 1,
vjust = 0,
size = 3
)+
guides(color = guide_legend(ncol = 2))
Fig7d1 <- km_os
#plot TTT Kaplan-meier curve based on cut-off
ttt <- survfit2(Surv(TTT) ~ ibr_resistance_ttt+IGHV.status, data = km_surv)
pval_ttt <- sub("^p", "", survfit2_p(ttt))
km_ttt <- ttt |>
ggsurvfit() +
labs(x = "Years", y = "Treatment-free survival", title = "Time to treatment") +
add_censor_mark(size = 1, alpha = 0.5) +
scale_ggsurvfit() +
labs(x="Years from sampling")+
theme_figures+
theme(legend.position = "bottom")+
scale_color_manual(labels=c("U-CLL, low ibrutinib viability", "U-CLL, high ibrutinib viability",
"M-CLL, low ibrutinib viability", "M-CLL, high ibrutinib viability"),
values = c("#CA054D", "#B96D40", "#A4D4B4", "#3B1C32"))+
annotate(
"text",
x = 2,
y = 0.05,
label = paste0("'Log-rank'~italic(P)", pval_ttt),
parse = TRUE,
hjust = 1,
vjust = 0,
size = 3
)+
guides(color = guide_legend(ncol = 2))
Fig7d2 <- km_ttt
#combine
Fig7f <- (Fig7d1/Fig7d2) + plot_layout(ncol=1, guides = 'collect') & theme(legend.position='bottom',
plot.margin = margin(t=1, b=1, l=1, r=1, unit = "cm"))
Fig7f
#multipanel
col1row3 <- plot_grid(Fig7c, Fig7d, NULL,
align = "h", axis = c("t", "b"), ncol = 3, rel_widths = c(0.4, 0.5, 0.1),
labels=c("", "D", ""), label_size=14)
## Warning in geom_segment(aes(x = ic50_ven$ic50[ic50_ven$diagnosis == "CLL"], : All aesthetics have length 1, but the data has 5 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.
## Warning in geom_segment(aes(x = ic50_ven$ic50[ic50_ven$diagnosis == "CLL"] - : All aesthetics have length 1, but the data has 5 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.
col1 <- plot_grid(Fig7a, Fig7b, col1row3, NULL,
align = "v", axis = c("l", "r"), ncol = 1, rel_heights = c(0.2, 0.2, 0.15, 0.45),
labels=c("A", "B", "C"), label_size=14)
## Warning: Removed 8 rows containing missing values or values outside the scale range
## (`geom_ribbon()`).
## Warning: Removed 8 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 34 rows containing missing values or values outside the scale range
## (`geom_ribbon()`).
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 34 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 8 rows containing missing values or values outside the scale range
## (`geom_ribbon()`).
## Warning: Removed 8 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 1 row containing non-finite outside the scale range
## (`sta_corr()`).
## Warning: Removed 34 rows containing missing values or values outside the scale range
## (`geom_ribbon()`).
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 34 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_text_repel()`).
col2 <- plot_grid(Fig7e, Fig7f, NULL,
align = "v", axis = c("l", "r"), ncol = 1, rel_heights = c(0.15, 0.35, 0.5),
labels=c("E", "F", ""), label_size=14)
Fig7 <- plot_grid(col1, col2, ncol =2, rel_widths = c(0.65, 0.35)) & theme(plot.margin=margin(r=1,l=1,t=1,b=1,unit="cm"))
ggsave(path=(paste0(filepath,"figures")), filename = "Fig7.svg", width=42, height=59.4, units = "cm")
## Warning: ggrepel: 4 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 2 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
ggsave(path=(paste0(filepath,"figures")), filename = "Fig7.pdf", width=42, height=59.4, units = "cm")
## Warning: ggrepel: 5 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 6 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
ggsave(path=(paste0(filepath,"figures")), filename = "Fig7.png", width=42, height=59.4, units = "cm", dpi=600)
## Warning: ggrepel: 4 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 2 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps